import requests from bs4 import BeautifulSoup from docx import Document from fake_useragent import UserAgent headers={ 'user-agent':UserAgent().random } url='https://kns.cnki.net/kns8s/search?classid=WD0FTY92&kw=%E5%B7%A5%E5%95%86%E7%AE%A1%E7%90%86&korder=FT' res=requests.get(url=url,headers=headers) res.encoding='utf-8' text=res.text code=res.status_code print(code) soup=BeautifulSoup(text,'html.parser') print(soup) lis = soup.find_all('a',class_='fz14') print(lis) doc = Document() for li in lis: doc.add_paragraph(li.get_text()) # 修改了这一行,使用了.get_text()方法 doc.save('output.docx')
爬取不到标题