事先需要准备的库和包文件:
其中的lxml,requests,pdfkit包可以使用命令:
conda install lxml
conda install requests
conda install pdfkit#用来转化为pdf格式,还需要下载wkhtmltox安装包,并且添加到环境变量中
author_name = input("请输入账号ID:")
Max_page_num = 100#最大页码数
i = 1
sess = requests.Session()
agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/93.0.4542.2 Safari/537.36'#浏览器代理
sess.headers['User-Agent'] = agent
def crawler_blog_by(author_name,article_id,title):
article_request_url = f"https://blog.youkuaiyun.com/{author_name}/article/details/{article_id}"
response = sess.get(article_request_url)
selector = etree.HTML(response.t