ajax异步请求-腾讯招聘岗位案例

最新推荐文章于 2025-04-03 21:25:56 发布

yunAike

最新推荐文章于 2025-04-03 21:25:56 发布

阅读量880

点赞数 9

文章标签： ajax 网络爬虫 python

本文链接：https://blog.youkuaiyun.com/yunAike/article/details/145288177

版权

代码部分

import requests
import time
import logging
from random import randint

# 设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# 请求头
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# 创建 Session 对象，复用连接
session = requests.Session()
session.headers.update(headers)


def generate_params(page_index):
    """
    动态生成请求的参数
    """
    return {
        "timestamp": int(time.time() * 1000),
        "countryId": None,  # 如果没有指定值，设置为 None
        "cityId": None,  # 如果没有指定值，设置为 None
        "bgIds": None,  # 如果没有指定值，设置为 None
        "productId": None,  # 如果没有指定值，设置为 None
        "categoryId": "40002002,40001002,40001004",  # 类别ID
        "parentCategoryId": None,  # 如果没有指定值，设置为 None
        "attrId": 1,  # 假设 attrId 是 1
        "keyword": None,  # 如果没有指定值，设置为 None
        "pageIndex": page_index,  # 当前页索引
        "pageSize": 10,  # 每页显示的记录数
        "language": "zh-cn",  # 语言设置
        "area": "cn"  # 区域设置
    }


def get_page_data(page_index, url):
    """
    获取指定页的数据
    """
    params = generate_params(page_index)
    try:
        response = session.get(url, params=params, timeout=10)
        response.raise_for_status()  # 如果响应状态码不是 200，会抛出异常

        # 处理编码问题
        if page_index == 1:
            logging.info(f"Declared encoding: {response.encoding}")
            logging.info(f"Apparent encoding: {response.apparent_encoding}")
            response.encoding = response.apparent_encoding  # 使用推测的编码

        logging.info(f"正在获取第 {page_index} 页数据...")
        json_data = response.json()

        if not json_data:
            logging.warning(f"第 {page_index} 页没有返回有效数据！")

        return json_data
    except requests.exceptions.RequestException as e:
        logging.error(f"请求失败，状态码: {response.status_code if response else '无响应'}")
        logging.error(f"异常信息: {e}")
        return None


def crawl_pages(url, total_pages=10):
    """
    爬取多页数据
    """
    for page_index in range(1, total_pages + 1):
        json_data = get_page_data(page_index, url)
        if json_data:
            logging.info(f"第 {page_index} 页数据爬取成功!")
            print("第{}页的数据如下:".format(page_index))
            print(json_data)
        else:
            logging.error(f"第 {page_index} 页数据爬取失败，跳过该页...")

        time.sleep(randint(3, 6))  # 随机休眠时间，防止被反爬


# 基础 URL
url = "https://careers.tencent.com/tencentcareer/api/post/Query"

# 爬取 10 页数据
crawl_pages(url)

结果部分