本来是打算构造请求参数然后拿 json 提数据的,但请求的参数属实是有些复杂
不过仔细看看还是有不少参数是一样的,所以干脆就多向下刷了几次,多拿到了一些请求,最初
是打算将相同的参数写定,然后把变化的参数弄成一个列表 ,,不过后来一想实在是有点麻烦,
所以就把那几个请求直接放在了列表里,然后弄个 for 循环每个都去访问一下好了
import requests
import json
from requests.exceptions import RequestException
import time
def get_page(urls):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
}
json_list = []
for url in urls:
try:
time.sleep(2)
response = requests.get(url, headers=headers).text
html = json.loads(response)
json_list.append(html)
except RequestException:
continue
return json_list
# 这一串又长又绿的,,就是那个请求列表
def url_list():
urls = [
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581473162&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A1C54E94C3BB3BD&cp=5E43EB638B5DAE1&_signature=.AjOuAAgEBA6X3c11FdlGPwIj6AAKJV8DKwZY22MJWsq1ROv'
'FPLtbE.1YzVMYoVY2CgET0pEVzI0iEXqDhiPYP5-A4uNgO52UalNylqSnVRzazHjuY.F.pj4W4Igm1isBab',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581482108&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A1551EA4733B3BB&cp=5E430BF35B9B9E1&_signature=FF0USAAgEBDSCq3FBGe4NxRdVVAAEoj8DKwZY22MJWsq1ROv'
'FPLtbE.1YzVMYoVY2CgET0pEVzI0iEXqDhiPYP5-A4uNgO52UalNylqSnVRzazHjuY.F.pj4W4Igm1isBab',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581473162&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A1351E74D37A7F3&cp=5E436A671FA39E1&_signature=vxltHAAgEBB5TtSRvyxlKL8ZLAAAOFoOoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581466232&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A125FE04037A7F4&cp=5E43BA376FE44E1&_signature=PCGcHAAgEBD6diWRbNHmNjwh3QAAGJoOoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581445891&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A1752E94537A7F7&cp=5E43AA578FE74E1&_signature=3PHw7AAgEBAapklhs.mElNzxsfAAIK6OoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581430411&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A115EEC4430A7FA&cp=5E437A979FAA2E1&_signature=5lWR7AAgEBAgAihh8IuSYOZV0PAALghOoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581430361&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A105DE34F3DA7FB&cp=5E43DAE7DF9B8E1&_signature=jtZheAAgEBBIgdj1EOyMJo7WIGAANCiOoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c',
'https://www.toutiao.com/api/pc/feed/?max_behot_time=1581425927&category=__all__&utm_source=toutiao&widen=1&tad'
'require=true&as=A1853E34D36A7FD&cp=5E43BAE7EFFD1E1&_signature=HSWrOAAgEBDbchK1XXegSB0l6iAAENqOoHL2tGvGmFXGcdHY'
'knMSUDObnxVSydiRHMQ9qESbtN49-8H0CL4nRejMdYrv1pHS7TjnqorKPsWV8I.bx4O9oPN4h6qA-6Yyi-c', ]
return urls
def page_parse(html):
for i in range(0, 10):
print(str(i) + html['data'][i]['title'])
def main():
html = get_page(url_list())
for html_every in html:
page_parse(html_every)
if __name__ == '__main__':
main()
其实有更好的方法,但无奈想尝试下利用 XHR 请求拿 json ,所以也就有了那一群地址