工具:request(发请求)
通过抓包发现数据在xhr请求中的这个位置。返回是json数据
分析完之后开始构建请求(部分代码):
1.1由于该网站会对headers检测,所以要构建完整的请求头,参数等:
import requests
url = "https://i.news.qq.com/web_feed/getHotModuleList"
# 提取目标字段
result = []
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"content-type": "application/json;charset=UTF-8",
"pragma": "no-cache",
"priority": "u=1, i",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"referer": "https://news.qq.com/", # 注意字段名是Referer
"Cookie":"qq_domain_video_guid_verify=8de17720424fbd6c; _qimei_uuid42=193170e1913100894e00a8a501156bd98238673e86; _qimei_fingerprint=d030e4b44760a72722dfc70ddadc13a5; _qimei_q36=; _qimei_h38=936f42124e00a8a501156bd902000006819317; pgv_pvid=7582736750; pgv_info=ssid=s7969037400; vversion_name=8.2.95; video_omgid=8de17720424fbd6c; qz_gdt=aqloazy3biaoiao3id5a; pac_uid=0_d9X6dYb2cKn8N; suid=user_0_d9X6dYb2cKn8N; current-city-name=gz; lcad_appuser=C7A7B5B39C4FE12C; lcad_Lturn=65; lcad_LPLFturn=636; lcad_o_minduid=PEZaidH8JIxtWTq1-L0JrV042Z0t0srp; lcad_LPSJturn=302; lcad_LBSturn=115; lcad_LVINturn=339; lcad_LDERturn=52"
}
for i in range(6, 11, 1):
payload = {
"base_req": {"from": "pc"},
"forward": "2",
"qimei36": "0_d9X6dYb2cKn8N",
"device_id": "0_d9X6dYb2cKn8N",
"flush_num": i,
"channel_id": "news_news_top",
"item_count": 20
}
response = requests.post(
url,
headers=headers,
json=payload,
timeout=20
)
jsons = response.json()
for item in jsons["data"]:
title = item ["title"]