# Copyright 2010, 2011 G24 # # This file is part of gpy. # # gpy is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # gpy is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with gpy. If not, see <http://www.gnu.org/licenses/>. import main import re import urllib import json class mod_main(main.clisten): """Web module.""" def addcmds(self): self.name = "web" self.cmds = {"ddg":"ddg","google":"google",'gtran':'gtran','yt':'yt'} def ddg(self): '''www.duckduckgo.com Zero-click Info boxes; 'ddg query text'.''' try: request = self.conn.recvinfo["message.params"].replace(" ","+") sock = urllib.urlopen("http://api.duckduckgo.com/?q=%s&o=json"%request) data = sock.read() sock.close() if json.loads(data)["AbstractText"] != "": self.conn.reply("%s %s)"%(json.loads(data)["AbstractURL"].encode('utf-8'),json.loads(data)["AbstractText"].encode('utf-8')[0:200])) elif json.loads(data)["Definition"] != "": self.conn.reply("%s %s"%(json.loads(data)["DefinitionURL"].encode('utf-8'),json.loads(data)["Definition"].encode('utf-8'))) except Exception, e: self.conn.cancel(json.loads(data)) self.conn.cancel(str(e)) def yt(self): '''www.youtube.com video info lookup; 'yt video_id'.''' try: request = self.conn.recvinfo["message.params"].replace(" ","+").replace("http://www.youtube.com/watch?v=","") sock = urllib.urlopen("http://gdata.youtube.com/feeds/api/videos/%s?v=2&alt=json"%request) data = sock.read() sock.close() self.conn.reply("'%s' by %s - %s %s (+%s - %s = %s)"% ( json.loads(data)["entry"]["title"]["$t"].encode('utf-8'), json.loads(data)["entry"]["author"][0]["name"]["$t"].encode('utf-8'), #json.loads(data)["entry"]["media$group"]["media$description"]["$t"].encode('utf-8').replace("\n"," "), json.loads(data)["entry"]["published"]["$t"].encode('utf-8').split('.')[0].replace("T"," "), json.loads(data)["entry"]["link"][0]["href"].encode('utf-8').replace("&feature=youtube_gdata",""), json.loads(data)["entry"]["yt$rating"]["numLikes"].encode('utf-8'), json.loads(data)["entry"]["yt$rating"]["numDislikes"].encode('utf-8'), int(json.loads(data)["entry"]["yt$rating"]["numLikes"].encode('utf-8')) - int(json.loads(data)["entry"]["yt$rating"]["numDislikes"].encode('utf-8')) ) ) except Exception, e: #self.conn.cancel(str(json.loads(data))) self.conn.cancel(str(e)) #self.conn.reply("Not found.") def google(self): '''www.google.com web search; 'google query_here'.''' try: request = self.conn.recvinfo["message.params"].replace(" ","+") sock = urllib.urlopen("http://ajax.googleapis.com/ajax/services/search/web?v=0.1&safe=high&q=%s"%request) data = sock.read() sock.close() self.conn.reply("%s - %s - %s"%(self.stripHTMLTags(json.loads(data)["responseData"]["results"][0]["titleNoFormatting"].encode('utf-8')),json.loads(data)["responseData"]["results"][0]["unescapedUrl"].encode('utf-8'),self.stripHTMLTags(json.loads(data)["responseData"]["results"][0]["content"].encode('utf-8')))) except Exception, e: self.conn.cancel(str(json.loads(data))) self.conn.cancel(str(e)) def gtran(self): '''translate.google.com ; 'gtran fromlang tolang text'.''' try: params = self.conn.recvinfo['message.params'] if params.count(' ')<2: return froml = params.split(' ')[0] tol = params.split(' ')[1] txt = ' '.join(params.split(' ')[2:]) sock = urllib.urlopen("http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=%s&langpair=%s|%s"%(txt,froml,tol)) data = sock.read() sock.close() #print json.loads(data) try: self.conn.reply("%s"%json.loads(data)["responseData"]["translatedText"].encode('utf-8')) except: self.conn.reply("%s"%json.loads(data)["responseDetails"].encode('utf-8')) except Exception, e: self.conn.cancel(str(json.loads(data))) self.conn.cancel(str(e)) def stripHTMLTags (self,html): """Strip HTML tags from any string and transfrom special entities""" text = html # apply rules in given order! rules = [ { r'>\s+' : u'>'}, # remove spaces after a tag opens or closes { r'\s+' : u' '}, # replace consecutive spaces { r'\s*<br\s*/?>\s*' : u'\n'}, # newline after a <br> { r'</(div)\s*>\s*' : u'\n'}, # newline after </p> and </div> and <h1/>... { r'</(p|h\d)\s*>\s*' : u'\n\n'}, # newline after </p> and </div> and <h1/>... { r'<head>.*<\s*(/head|body)[^>]*>' : u'' }, # remove <head> to </head> { r'<a\s+href="([^"]+)"[^>]*>.*</a>' : r'\1' }, # show links instead of texts { r'[ \t]*<[^<]*?/?>' : u' ' }, # remove remaining tags { r'^\s+' : u'' } # remove spaces at the beginning ] for rule in rules: for (k,v) in rule.items(): regex = re.compile (k) text = regex.sub (v, text) # replace special strings special = { ''' : u'\'', ' ' : ' ', '&' : '&', '"' : '"', '<' : '<', '>' : '>', '·' : '*' } for (k,v) in special.items(): text = text.replace (k, v) return text