python bs4实现爬取金山词霸内容,最下面是源码和编译好的百度网盘地址
之前的翻译器不能用了,百度API翻译是个垃圾玩意 真不愧是百度(日常黑百度)
然后就重新用bs4库写了传新的版本,这种右下角小框框式的简洁的翻译器用起来比浏览器网页翻译 好用多了
首先就是在平常查资料的时候浏览器是这样的
用虚拟机碰到不懂的单词是这样的
现在是这样的
 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
}
def req_get(headers) :
word = input('请输入需要翻译的单词:')
# word = '你好'
url = 'http://www.iciba.com/' + word
req_get = requests.get(url,headers=headers)
html = req_get.text
return html
def regular(html):
soup = bs4.BeautifulSoup(html, 'html.parser')
rugular_findall = soup.find('li',class_='clearfix').find('p')
#print(rugular_findall)
for i in rugular_findall:
#print(type(i))
print(str(i).replace('<span>',' ').replace('</span>',' '))
def regular_sentence(html):
soup = bs4.BeautifulSoup(html, 'html.parser')
try:
rugular_findall = soup.find('div', style="width: 580px; margin-top: 15px; font-size: 18px; line-height: 24px; color: #333333;")
print(rugular_findall.get_text())
except:
rugular_findall = soup.find('div',style="width: 580px; margin-top: 15px; font-size: 15px; line-height: 24px; color: #333333;")
print(str(rugular_findall).replace('<div',' ').replace('style="width: 580px; margin-top: 15px; font-size: 15px; line-height: 24px; color: #333333;">','').replace('</div>',''))
# for i in rugular_findall:
# print(i)
def translat():
while True:
html = req_get(headers)
try:
regular(html)
# print(1)
except AttributeError:
regular_sentence(html)
#print(2)
except:
print('error')
translat()
print('----------------')
def main():
try:
translat()
except :
translat()
if __name__ == '__main__':
main()
链接:https://pan.baidu.com/s/1qu6leR2LPrYmSVX8Y-ovcw
提取码:dkcq