获取某植物的部分信息并保存到docx文档里面


headers ={
    "User-Agent": yours(用你们自己的),
        "Host": "www.iplant.cn"  }
import re
import requests
from lxml import etree
import json
from docx import Document
from docx.oxml.ns import qn

def get(key):
    url = "http://www.iplant.cn/frps/advsearch?key=" + key

    response = requests.get(url=url, headers=headers)
    response.encoding = "utf-8"


    html = response.text
    html_element = etree.HTML(html)
    name = html_element.xpath('//div[@id="sptitlel"]/text()')[0]
    name = name.split(" ")
    name = "+".join(name)


    url_2 = "http://www.iplant.cn/ashx/getfrps.ashx?key=" + name

    response1 = requests.get(url_2, headers=headers)
    response1.encoding = "utf-8"

    html2_element = etree.HTML(response1.text)

    pindents_lst = html2_element.xpath("/html/body/p[@class='pindent']")


    text2 = []
    for pindent in pindents_lst[1:]:
        # print(pindent)
        text2 += pindent.xpath(".//text()")

    # print(text2)
    text2 = '\n\n'.join(text2)
    # print(text2)

    spno = re.search(r'"frpsspno": "(.*?)"', response1.text).group(1)

    headers2 = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36",
        "Referer": response.url}
    url_3 = "http://www.iplant.cn/ashx/getfrpsclass.ashx"
    data = {'spno': spno,
            'spclassid': '24'
            }
    response3 = requests.post(url=url_3, headers=headers2, data=data)
    response3.encoding = "utf-8"
    # print(response3)
    # print(response3.text)

    abc = json.loads(response3.text)

    abc = str(abc['frpsclasstxt'])
    # print(abc)
    html4_element = etree.HTML(abc)
    class_left = html4_element.xpath("//text()")

    class_left = '-'.join(class_left)
    # print(class_left)
    return class_left,text2

def write(doc,class_left,text2):



    # 设置正文中文字体
    microsoft_font = u'微软雅黑'  # u 表示后面的字符串以 Unicode 格式进行编码
    area = qn('w:eastAsia')

    doc.styles['Normal'].font.name = microsoft_font
    doc.styles['Normal']._element.rPr.rFonts.set(area, microsoft_font)

    # 设置标题样式
    black_font = u'黑体'
    run = doc.add_heading('', level=1).add_run(class_left)
    run.font.name = black_font
    run._element.rPr.rFonts.set(area,black_font)

    # 新增新的段落
    paraObj1 = doc.add_paragraph(text2)

    # 新增换行符
    run.add_break()
    




if __name__ == '__main__':
    while True:
        i = 1
        key = input("如果想要退出程序,请直接按enter键\n请输入你要查看的植物:")
        if key != "":
            try:
                class_left,message = get(key=key)
                # print(class_left,message)
                doc = Document()

                write(doc=doc,class_left=class_left,text2=message)

                doc.save(str(i)+class_left + '.docx')
                i += 1
            except:
                print("搜索失败")
        else:
            input("退出程序.....")

            break

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值