# -*- coding: utf-8 -*- #written by Korkakakis Nikos (korkakak@ceid.upatras.gr) #url : http://blogs.pwmn.net/korkakak __author__="Korkakakis Nikos" __date__ ="$10 Μαϊ 2009 5:29:59 μμ$" #needed for the stupid http / network issues (browser object etc) import re, mechanize from mechanize import Browser import lxml.html from lxml.html import fromstring SearchEngineUrl = "http://10.140.4.4:8001/yacysearch.html?query=" print "Warning : deprecated version" print "please use the YacySearchIntranet.py Instead" #inits the browser object with bogus information def initBrowser(): cookies = mechanize.CookieJar(); opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies)); opener.addheaders = [("User-agent", "Mozilla/5.0 (X11; U; Linux x86_64; el; rv:1.9.0.8) Gecko/2009041666 ibot Metallica/3.6.9")]; mechanize.install_opener(opener); br = Browser(); return br; #sends the query tp the yacy search engine: change the SearchEngineUrl to #support other search engines #For instance for google you have to put: #SearchEngineUrl = "http://www.google.gr/search?q=" def sendQuery(txt): BR = initBrowser() return BR.open(SearchEngineUrl+txt); #reads the plurality from the responses def Plurality(txt): res = sendQuery(txt) data = res.read(); #redoing due to heavy lag the yacy has for the first fetch result set #so we doing this just in case :) res = sendQuery(txt) data =res.read(); #create the parser and feed the data doc = fromstring(data) #edw gia different search engines mporei na 8elei diaforetiko tag h genika #diaforetiki logiki: THIS WORKS FOR YACY ONLY!!! node=doc.get_element_by_id("totalcount") return node.text_content(); #grab all the results from the search engine def GetSomeResults(txt): nodes = [] result = "Search Results for "+txt+" :\n"; res = sendQuery(txt) data =res.read(); #redoing due to heavy lag the yacy has for the first fetch result set #so we doing this just in case :) res = sendQuery(txt) data =res.read(); #create the parser and feed the data doc = fromstring(data) #edw gia different search engines mporei na 8elei diaforetiko tag h genika #diaforetiki logiki: THIS WORKS FOR YACY ONLY!!! snippets = doc.find_class("linktitle") urls = doc.find_class("url") #check if we have found any results if len(snippets) == 0 | len(urls) == 0: return "Sorry no result found for "+txt+" in the PWMN intranet search engine\n" #iterate into the result set for i in range(0,len(snippets)): # allaxe gia na mpei mesa se try - catch block wste na glutwnw to unicodeencodeerror # result = result + str(i+1) +") " + (snippets[i].text_content()).encode('latin-1') +" ::: " + urls[i].text_content() + "\n"; try: result = result + (snippets[i].text_content()).encode('latin-1') +"\n\tURL: " + urls[i].text_content(); except UnicodeEncodeError,e: result = result + "\n "+txt +"\n\tURL: " + urls[i].text_content(); return result; #Vriskei poio einai pio gnwsto string term apo tis duo eisodous def Versus(a,b): result = ""; ares = RemoveComafromString (Plurality(a)) bres = RemoveComafromString (Plurality(b)) if ares > bres: result = a + " wins "+b +" for "+str(ares)+" to "+str(bres) elif bres > ares: result = b + " wins "+a +" for "+str(bres)+" to "+str(ares) else: result = a + " and "+ b +" are equal at " +str(ares) return result; #prospa8ei na dior8wsei to provlhma me to komma sto returned value def RemoveComafromString(val): result = 0; try: result = int(val) return result; except ValueError,e: tmp = val.split(",") result=RemoveComafromString(''.join(tmp)) return result; def searchString(txt="pwmn"): sendQuery(txt) return; if __name__ == "__main__": print "Yacy Intranet Query" print "\t\tWritten by Korkakakis Nikos" print Versus("grigoris","under"); print Plurality("korki"); print GetSomeResults("wiman"); # Usage of this file: # Preamble: everything here, works for YACY ONLY! Many of the above functions in # order to work may need major rewrite to work. If yoy need something like that # and you are intrested please email your bounty @ korkakak@ceid.upatras.gr # # Yparxoun treis endiaferouses sunarthseis (kai pou exoun nohma na xrhsimopoiountai) # apo trites efarmoges # # a) Versus (String txtA,String txtB) # auto epistrefei ena String pou perigrafei thn diafora sta word references # anamesa sta dyo Strings # b) Plurality (String txt) # Epistrefei ena String pou perigrafei ena akeraio. Se megala apotelessmata px # ths taxhs twn xiliadwn to String endexete na periexei kai ena h perissotera koma # kai ta opoia apaloifoume me thn anadromiki klhsh ths RemoveComafromString kai pou # metatrepei to String se integer # c) GetSomeResults (String txt) # Epistrefei ena merikws formarismeno String me merika results apo to search engine # auta mporei na einai max 10, enw ginete elegxos gia to an uparxoun results an to # encoding twn results einai swsto ktl. # enjoy :)