Python语言快手视频爬虫程序代码-2025-8-4

最新推荐文章于 2025-08-04 19:52:09 发布

EYYLTV

最新推荐文章于 2025-08-04 19:52:09 发布

阅读量245

点赞数 4

CC 4.0 BY-SA版权

文章标签： python 开发语言爬虫

本文链接：https://blog.youkuaiyun.com/qq_32257509/article/details/149906669

import requests
import os
import re
from urllib.parse import quote
import time

def search_videos(keyword):
url = ‘https://www.kuaishou.com/graphql’

# 请务必更新这里的did值！！！
cookies = {
    'did': 'web_841df24177463af919b38850464d1709;',
    'didv': str(int(time.time() * 1000)),
    'kpf': 'PC_WEB',
    'clientid': '3',
    'kpn': 'KUAISHOU_VISION'
}

headers = {
    'Accept': 'application/json, text/plain, */*',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Content-Type': 'application/json',
    'Origin': 'https://www.kuaishou.com',
    'Pragma': 'no-cache',
    'Referer': f'https://www.kuaishou.com/search/video?searchKey={quote(keyword)}',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-origin',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
    'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

json_data = {
    'operationName': 'visionSearchPhoto',
    'variables': {
        'keyword': keyword,
        'pcursor': '',
        'page': 'search',
    },
    'query': 'fragment photoContent on PhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n  __typename\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  commentCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  riskTagContent\n  riskTagUrl\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n  }\n    __typename\n  }\n  photo {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n',
}

try:
    response = requests.post(
        url=url,
        cookies=cookies,
        headers=headers,
        json=json_data,
        timeout=10
    )

    response.raise_for_status()
    data = response.json()

    feeds = data.get('data', {}).get('visionSearchPhoto', {}).get('feeds', [])
    if not feeds:
        print("没有找到相关视频！可能是Cookie失效或请求被限制")
        return None

    return feeds

except Exception as e:
    print(f"搜索失败: {str(e)}")
    return None

def download_videos(feeds, count):
if not os.path.exists(‘sp’):
os.mkdir(‘sp’)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
}

downloaded = 0
for index in feeds:
    if downloaded >= count:
        break

    title = index['photo']['caption']
    link = index['photo']['photoUrl']

    if not title or not link:
        continue

    print(f"正在下载: {title}")

    try:
        video_response = requests.get(
            url=link,
            headers=headers,
            stream=True,
            timeout=10
        )
        video_response.raise_for_status()

        # 清理文件名
        title = re.sub(r'[^\w\u4e00-\u9fff-]', '_', title)[:50]  # 保留中文字符
        file_path = os.path.join('sp', f"{title}.mp4")

        with open(file_path, 'wb') as f:
            for chunk in video_response.iter_content(chunk_size=8192):
                f.write(chunk)

        downloaded += 1
        print(f"成功下载: {title}.mp4 ({downloaded}/{count})")

    except Exception as e:
        print(f"下载失败: {title} - {str(e)}")
        continue

if name == “main”:
print(“快手视频下载器”)

# 第一步：输入视频名称并搜索
keyword = input("请输入要搜索的视频名称: ").strip()
if not keyword:
    print("视频名称不能为空！")
    exit()

print("\n正在搜索视频，请稍候...")
feeds = search_videos(keyword)

if not feeds:
    input("\n[按回车键退出]")
    exit()

video_count = len(feeds)
print(f"\n共找到 {video_count} 个相关视频")

# 第二步：输入下载数量
try:
    count = int(input(f"请输入要下载的视频数量(1-{video_count}): ").strip())
    if count <= 0:
        print("数量必须大于0！")
        exit()
    if count > video_count:
        print(f"数量不能超过找到的视频数 {video_count}！")
        exit()
except ValueError:
    print("请输入有效的数字！")
    exit()

# 第三步：下载视频
print("\n开始下载视频...")
download_videos(feeds, count)

input("\n下载任务完成！\n[按回车键退出]")