headers ={
"User-Agent": yours(用你们自己的),
"Host": "www.iplant.cn" }
import re
import requests
from lxml import etree
import json
from docx import Document
from docx.oxml.ns import qn
def get(key):
url = "http://www.iplant.cn/frps/advsearch?key=" + key
response = requests.get(url=url, headers=headers)
response.encoding = "utf-8"
html = response.text
html_element = etree.HTML(html)
name = html_element.xpath('//div[@id="sptitlel"]/text()')[0]
name = name.split(" ")
name = "+".join(name)
url_2 = "http://www.iplant.cn/ashx/getfrps.ashx?key=" + name
response1 = requests.get(url_2, headers=headers)
response1.encoding = "utf-8"
html2_element = etree.HTML(response1.text)
pindents_lst = html2_element.xpath("/html/body/p[@class='pindent']")
text2 = []
for pindent in pindents_lst[1:]:
# print(pindent)
text2 += pindent.xpath(".//text()")
# print(text2)
text2 = '\n\n'.join(text2)
# print(text2)
spno = re.search(r'"frpsspno": "(.*?)"', response1.text).group(1)
headers2 = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36",
"Referer": response.url}
url_3 = "http://www.iplant.cn/ashx/getfrpsclass.ashx"
data = {'spno': spno,
'spclassid': '24'
}
response3 = requests.post(url=url_3, headers=headers2, data=data)
response3.encoding = "utf-8"
# print(response3)
# print(response3.text)
abc = json.loads(response3.text)
abc = str(abc['frpsclasstxt'])
# print(abc)
html4_element = etree.HTML(abc)
class_left = html4_element.xpath("//text()")
class_left = '-'.join(class_left)
# print(class_left)
return class_left,text2
def write(doc,class_left,text2):
# 设置正文中文字体
microsoft_font = u'微软雅黑' # u 表示后面的字符串以 Unicode 格式进行编码
area = qn('w:eastAsia')
doc.styles['Normal'].font.name = microsoft_font
doc.styles['Normal']._element.rPr.rFonts.set(area, microsoft_font)
# 设置标题样式
black_font = u'黑体'
run = doc.add_heading('', level=1).add_run(class_left)
run.font.name = black_font
run._element.rPr.rFonts.set(area,black_font)
# 新增新的段落
paraObj1 = doc.add_paragraph(text2)
# 新增换行符
run.add_break()
if __name__ == '__main__':
while True:
i = 1
key = input("如果想要退出程序,请直接按enter键\n请输入你要查看的植物:")
if key != "":
try:
class_left,message = get(key=key)
# print(class_left,message)
doc = Document()
write(doc=doc,class_left=class_left,text2=message)
doc.save(str(i)+class_left + '.docx')
i += 1
except:
print("搜索失败")
else:
input("退出程序.....")
break