1.写代码之前必须要搞清的事。
分析网页 网站APP的逻辑关系! 一步一步出现我们看到的结果!
2.POST请求与GET请求的区别。
2.1 网址基本的构造
网址接口?参数列表
https://www.xxx.xxx/xxx/xxx?xxx=xxx&yyy=yyy&zzz=zzz&www=www
2.2 post和get的区别
requests.get 请求网址 网址一般来说很长 网址接口和参数列表放在一起了!
requests.post 请求网址 网址一般来说很短 网址接口和参数列表分开放的! 安全性!
2.3 参数列表到底在哪里? 负载/载荷
data = {
}
去负载里面 打开第一个小三角 小三角下面的内容复制过来粘贴!
冒号前面的 加引号 每一条数据的后面加逗号!
2.4 怎么去请求?
res = rquests.post(url, json=data)
伪装上一节课讲过!~~
3.正则表达式提取数据。
import re
# 从res1.text里面去把视频的链接单独提取出来!
video_url = re.findall(‘“photoUrl”:“(.*?)”,“photoH265Url”’, res1.text)[0]
print(video_url)
【完整代码】
import requests
import re
# 网址接口
url1 = 'https://www.kuaishou.com/graphql'
# 伪装浏览器数据
wz = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'}
episodenumber = 0
while True:
# 参数列表 在负载里面去复制过来
data = {
'operationName': "visionTubeEpisodeQuery",
'query': "fragment photoContent on PhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n riskTagContent\n riskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n __typename\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n riskTagContent\n riskTagUrl\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n ...recoPhotoFragment\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionTubeEpisodeQuery($tubeId: String, $episodeNumber: Int, $page: String, $channelId: Int, $webPageArea: String) {\n visionTubeEpisode(tubeId: $tubeId, episodeNumber: $episodeNumber, page: $page, channelId: $channelId, webPageArea: $webPageArea) {\n ...feedContent\n result\n status\n __typename\n }\n}\n",
'variables': {'tubeId': "5xcvpecnfii6vgs", 'episodeNumber': episodenumber, 'page': "theater", 'channelId': 10000}
}
# 请求的时候用post 网址接口 和 参数列表分别填进去
res1 = requests.post(url1, json=data, headers=wz)
# 从res1.text里面去把视频的链接单独提取出来!
video_url = re.findall('"photoUrl":"(.*?)","photoH265Url"', res1.text)[0]
print(video_url)
# 把前面得到的视频链接带入下面的3行代码。
url = video_url
res = requests.get(url)
open(f'我的小娇妻第{episodenumber+1}集.mp4', 'wb').write(res.content)
episodenumber += 1