首先需要安装 Requests-HTML,使用
pip3 install Requests-HTML
或:
pip3 install requests-html
注意:Linux 系统注意切换最高权限模式。
下面上代码:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from requests_html import HTMLSession
def parse_data(url):
all_parse = HTMLSession().post(url).html.find(".article-item-box")
if not len(all_parse):
raise Exception("End Load!")
return [{"title": item.text.split("\n")[0][2:], "link": item.links.pop()} for item in all_parse]
def get_all_data(url):
suffix = "/article/list/{}"
all_data = list()
try:
index = 1
while True:
this_data = parse_data(url + suffix.format(index))
print(index)
all_data.extend(this_data)
index += 1
except Exception as e:
print(e)
return all_data
url = "https://blog.youkuaiyun.com/qq_33811662" # 你自己的博客前缀
PRE_HTML = "<!DOCTYPE html><html><head><meta charset='UTF-8'><title>个人博客</title></head><body>"
SUF_HTML = "</body></html>"
all_data = get_all_data(url)
print(len(all_data))
with open("my_blog.html", "w", encoding="utf-8") as code:
code.write(PRE_HTML)
for data in all_data:
with open("my_blog.html", "a", encoding="utf-8") as code:
code.write(
"<div><a class='blog' target='_blank' href='{}'>{}</a></div>".format(data.get("link"), data.get("title")))
with open("my_blog.html", "a", encoding="utf-8") as code:
code.write(SUF_HTML)
import os
os.system("my_blog.html")
for data in all_data:
requests.post(data.get("link"))
最后结果:my_blogs.html
Java 版本:https://blog.youkuaiyun.com/qq_33811662/article/details/81035451