re
xpath
json analysis
use chrome driver
The simplest one:
#encoding=utf-8
import urllib
import re
def youdao(keyword):
url='http://www.youdao.com/w/eng/'+keyword
page=urllib.urlopen(url).read()
find_result=re.findall(r'<div class="trans-container">(.*?)</div>',page,re.S|re.M)
return_string=find_result[0].strip()
return_string=re.sub('<(.*?)>','',return_string).strip()
num=max(map(len,return_string.split('\n')))
print(''.join(['*']*num))
print return_string
print(''.join(['*'*num]))
return '\n'+keyword+' : '+return_string+'\n'
youdao('你好')
youdao('hello')
Demo2:
import urllib
from lxml import etree
url="http://www.dioenglish.com/home.php?mod=space&uid=114322&do=blog&id=55535"
xp = '//div[@id="blog_article"]'
def get(url,xp):
t = urllib.urlopen(url).read()
sele = etree.HTML(t)
#content = sele.xpath('//div[@id="blog_article"]/p/span/font/text()')
if xp[-2:] =='()':
info = sele.xpath(xp)
else:
content = sele.xpath(xp)
info = content[0].xpath('string(.)').encode('utf-8')
return info