Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门
https://item.taobao.com/item.htm?spm=a1z38n.10677092.0.0.482434a6EmUbbW&id=564564604865
中文翻译为英语容易出错,还是提供API接口方便些,不容易出错。爬虫抓取容易出错
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 19 09:05:56 2016
有道翻译爬虫(英译中)
@author: Administrator
"""
import requests,bs4,time
#有中文会出错,原因不明
#words_list=["python","job","hello world"," amoxicillin","阿莫西林","clarithromycin","克拉霉素"]
words_list=["metformin hydrochloride","amoxicillin","clarithromycin","Viagra","sildenafil"]
translation_list=[]
word="python"
word2='n. 巨蟒;大蟒n. (法)皮东(人名)'
#单词去除空格,名词符号,等等
def word_format(word):
word1=word.strip()
#去掉空格
word2=word1.replace('\n','')
word3=word2.replace('n.','')
word4=word3.replace(" ",'')
return word4
'''
word2='n. 巨蟒;大蟒n. (法)皮东(人名)'
word_format(word2)
Out[90]: '巨蟒;大蟒 (法)皮东(人名)'
'''
#翻译一个单词全面版本
def Get_full_translation(word):
url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word)
res=requests.get(url)
soup=bs4.BeautifulSoup(res.text,"lxml")
elems=soup.select('.trans-container')
translation=elems[0].text
translation1=translation.replace('\n','')
return translation1
#批量翻译所有单词全面版本
def Get_all_full_translation(words_list):
for word in words_list:
try:
translation=Get_full_translation(word)
translation_list.append(translation)
except:
print("exception:",word)
continue
#翻译简单版本
def Get_simple_translation(word):
url="http://dict.youdao.com/w/%s/#keyfrom=dict.index"%(word)
res=requests.get(url)
soup=bs4.BeautifulSoup(res.text,"lxml")
elems=soup.select('.trans-container')
translation=elems[0].text
translation1=word_format(translation)
#分割解释的意思
wordTranslation_list=translation1.split(";")
#索取第一个翻译意思
translation2=wordTranslation_list[0]
return translation2
'''
Get_simple_translation(word)
Out[108]: '巨蟒'
'''
#批量翻译所有单词精简版本
def Get_all_simple_translation(words_list):
for word in words_list:
try:
translation=Get_simple_translation(word)
translation_list.append(translation)
except:
print("exception:",word)
continue
time.sleep(3)
print("congradulation!")
'''
Get_all_simple_translation(words_list)
congradulation!
translation_list
Out[126]: ['盐酸二甲双胍', '阿莫西林', '克拉霉素', '万艾可', '西地那非']
'''