爱奇艺个人中心数据采集
声明:本文只作学习研究,禁止用于非法用途,否则后果自负,如有侵权,请告知删除,谢谢!
目标网址链接->>> 点击跳转.
图片原因反馈很多次版权问题了,我打码了 这样可以吗。
已覆盖 动态/视频/专辑
分析视频/专辑/动态接口 发现API都不同,但加密sign都是同一套加密函数。其中不同类别的加密通过 L()(i) 中的i的不同会得到不同的结果
if typ==1:
# 专辑
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_album_collection_videos.action?agenttype=118&agentversion=10.7.5&albumPage={page}&albumSize=6&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q×tamp={time_stamp}&videoSize=4NZrFGv72GYppTUxO"
elif typ==2:
# 视频
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_videos.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}&size=28×tamp={time_stamp}NZrFGv72GYppTUxO"
elif typ==3:
# 动态
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_dynamic_ids.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}×tamp={time_stamp}NZrFGv72GYppTUxO"
关于函数 L() 就是md5,
tip:对于加密函数可以在页面上直接尝试加密1或者a 得到的结果就可知道是哪种加密md5(1)/bs64(1) 等等。
复制直接调用
# -*- coding:utf-8 -*-
# 享受雷霆感受雨露
# author xyy,time:2021/6/16
import re
import time
import requests
from hashlib import md5
headers = {
'Connection': 'keep-alive',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-site',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'script',
'Referer': 'https://www.iqiyi.com/',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
# Md5 加密函数 32 返回32位的加密结果
def md5_use(text: str) -> str:
result = md5(bytes(text, encoding="utf-8")).hexdigest()
return result
# 获取加密参数 sign
def get_sign(page_num,uid,authcookie,dfp,typ=1,time_stamp=""):
""":key
typ 1 专辑
typ 2 视频
typ 3 动态
"""
st = ""
if typ==1:
# 专辑
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_album_collection_videos.action?agenttype=118&agentversion=10.7.5&albumPage={page}&albumSize=6&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q×tamp={time_stamp}&videoSize=4NZrFGv72GYppTUxO"
elif typ==2:
# 视频
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_videos.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}&size=28×tamp={time_stamp}NZrFGv72GYppTUxO"
elif typ==3:
# 动态
st = "GETiqiyihao.iqiyi.com/iqiyihao/entity/get_dynamic_ids.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}×tamp={time_stamp}NZrFGv72GYppTUxO"
md5_str = st.format(page=page_num,uid=uid,authcookie=authcookie,dfp=dfp,time_stamp=time_stamp)
return md5_use(md5_str)
# 获取主页下分类内容
def get_page(page,uid,typ,jsonp_name):
authcookie, dfp = "",""
time_stamp = int(round(time.time() * 1000))
if typ==3: # 需要获取动态的时候有问题 需要请求两次获取 因为他的翻页在上一页的请求中
if page>1:
page = "".join(re.findall(r'"page":"(.*?)"',get_page_typ3(page,uid,typ).text))
else:
page = "df_0_0:dv_0_0"
sign = get_sign(page, uid, authcookie, dfp, typ, str(time_stamp))
url = ""
if typ==1:
url = "https://iqiyihao.iqiyi.com/iqiyihao/entity/get_album_collection_videos.action?agenttype=118&agentversion=10.7.5&albumPage={albumPage}&albumSize=6&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&sign={sign}×tamp={time_stamp}&videoSize=4&callback={jsonp_name}"
elif typ==2:
url = "https://iqiyihao.iqiyi.com/iqiyihao/entity/get_videos.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}&sign={sign}&size=28×tamp={time_stamp}&callback={jsonp_name}"
elif typ==3:
url = "https://iqiyihao.iqiyi.com/iqiyihao/entity/get_dynamic_ids.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}&sign={sign}×tamp={time_stamp}&callback={jsonp_name}"
url = url.format(sign=sign,page=page,uid=uid,authcookie=authcookie,dfp=dfp,time_stamp=time_stamp,albumPage=page,jsonp_name=jsonp_name)
payload = {}
response = requests.request("GET", url, headers=headers, data=payload,proxies={})
# print(response.text)
return response
# 专门为了动态而活
def get_page_typ3(page,uid,typ,jsonp_name):
authcookie, dfp = "",""
time_stamp = int(round(time.time() * 1000))
page = "df_0_0:dv_{}_0".format((page-1)*10)
sign = get_sign(page, uid, authcookie, dfp, typ, str(time_stamp))
url = "https://iqiyihao.iqiyi.com/iqiyihao/entity/get_dynamic_ids.action?agenttype=118&agentversion=10.7.5&authcookie={authcookie}&dfp={dfp}&fuid={uid}&m_device_id=cv2irlndqb0opl8fgsydst7q&page={page}&sign={sign}×tamp={time_stamp}&callback=jsonp_1626005050286_49428"
url = url.format(sign=sign,page=page,uid=uid,authcookie=authcookie,dfp=dfp,time_stamp=time_stamp)
payload = {}
response = requests.request("GET", url, headers=headers, data=payload,proxies={})
return response
if __name__ == '__main__':
# authcookie = "03N92J6K4m1gm33np09kuv0spG4YDgjbqwJOzcM1LzeARuMm1zMlbdeOnWK9fwpEB7isq77" # 我的世界木鱼
# dfp = "a1e60ddab2a6f442d59979b3740faf0082372e3edd26de1171fddf0c2a25f553e8" # 我的世界木鱼
""":key
typ 1 专辑
typ 2 视频
typ 3 动态
typ 待添加
"""
jsonp_name = "xyy" # 默认xyy
page = 1
# uid = 1201201320 # 我的世界木鱼
uid = 1015560516 # 爱小艺KG
typ = 2
info = get_page(page,uid,typ,jsonp_name) # 不知道的名字 有视频 / 动态
print(info.text)
authcookie/dfp 参数可省略
uid : 用户ID (必填)
page : 翻页 (必填 从1开始)
typ : 类型 (必填)
以上 就完成对sign参数解析
欢迎👏关注我的GitHub 欢迎star
我会分享一些平时的爬虫小例子 我们一起讨论