今日头条ajax技术内as ,cp,_cp_signature参数分析、破解及python爬取频道数据

本文详细介绍了如何解析和破解今日头条API请求中涉及的as, cp和_cp_signature加密参数。通过分析JS代码,转化为Python实现,从而能够成功爬取指定频道的数据。提供了完整的Python爬虫代码资源链接。" 113369685,10536711,C语言实现:顺序查找与优化,"['数据结构', '算法', 'C语言编程', '线性表']

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

【腾讯云】11.11 云上盛惠,云产品限时抢购,1核2G云服务器首年88元

相信很多人在爬今日头条的时候就遇到了一个难题,就是找到了消息头后,有3个加密的参数(as ,cp,_cp_signature),如下图:

找到的请求URL是:

https://www.toutiao.com/api/pc/feed/?max_behot_time=1535349295&category=__all__&utm_source=toutiao&widen=1&tadrequire=false&as=A1F56BD8E379234&cp=5B831962B354CE1&_signature=PRRukBAUZp609VcKGFWglD0Ubo

以上共有

max_behot_time;category;utm_source;widen;tadrequir;as;cp;_signature;8个参数;

max_behot_time 可以看出是10位数字的时间戳;

category 是对应的频道名,可以在首页找到

utm_source 固定是toutiao,widen固定是1,tadrequire固定是false,剩下的就是as,cp,_cp_signature三个参数了;

要破解这三个参数,我们需要找下JS的代码里是否有相关信息,于是通过搜索“_signature”找到了对应的JS加密代码:

 

function e(t) {
        var e = ascp.getHoney()
          , i = "";
        window.TAC && (i = TAC.sign("refresh" === t ? 0 : r.params.max_behot_time_tmp)),
        r.params = _.extend({}, r.params, {
            as: e.as,
            cp: e.cp,
            max_behot_time: "refresh" === t ? 0 : r.params.max_behot_time_tmp,
            _signature: i
        }

可以看到as和cp是由ascp.getHoney函数传递的,我们继续找下getHoney这个函数;

!function(t) {
    var e = {};
    e.getHoney = function() {
        var t = Math.floor((new Date).getTime() / 1e3)
          , e = t.toString(16).toUpperCase()
          , i = md5(t).toString().toUpperCase();
        if (8 != e.length)
            return {
                as: "479BB4B7254C150",
                cp: "7E0AC8874BB0985"
            };
        for (var n = i.slice(0, 5), a = i.slice(-5), s = "", o = 0; 5 > o; o++)
            s += n[o] + e[o];
        for (var r = "", c = 0; 5 > c; c++)
            r += e[c + 3] + a[c];
        return {
            as: "A1" + s + e.slice(-3),
            cp: e.slice(0, 3) + r + "E1"
        }
    }
    ,
    t.ascp = e
}(window, document)

根据以上JS代码,可以写成python代码:

    def getHoney():  #####根据JS脚本破解as ,cp
        t = int(time.time())  #获取当前时间
        #t=1534389637
        #print(t)
        e =str('%X' % t)  ##格式化时间
        #print(e)
        m1 = hashlib.md5()  ##MD5加密
        m1.update(str(t).encode(encoding='utf-8'))  ##转化格式
        i = str(m1.hexdigest()).upper() ####转化大写
        #print(i)
        n=i[0:5]    ##获取前5位字符
        a=i[-5:]    ##获取后5位字符
        s=''
        r=''
        for x in range(0,5):   ##交叉组合字符
            s+=n[x]+e[x]
            r+=e[x+3]+a[x]
        eas='A1'+ s+ e[-3:]   
        ecp=e[0:3]+r+'E1'
        #print(eas)
        #print(ecp)
        return eas,ecp     

接下来就是破解_cp_signature,经过搜索找不到对应的JS脚本,百度很久后找到了个大牛(太阳雨2012 )自己破解的JS代码,

navigator = {
    // WT-JS_DEBUG v1.7.5 - NLiger2018
    appCodeName: "Mozilla",
    appMinorVersion: "0",
    appName: "Netscape",
    appVersion: "5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
    browserLanguage: "zh-CN",
    cookieEnabled: true,
    cpuClass: "x86",
    language: "zh-CN",
    maxTouchPoints: 0,
    msManipulationViewsEnabled: true,
    msMaxTouchPoints: 0,
    msPointerEnabled: true,
    onLine: true,
    platform: "Win32",
    pointerEnabled: true,
    product: "Gecko",
    systemLanguage: "zh-CN",
    userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
    userLanguage: "zh-CN",
    vendor: "",
    vendorSub: "",
    webdriver: false
}, window = this, window.navigator = navigator;

if (typeof JSON !== "object") {
    JSON = {};
}(function () {
    "use strict";
    var rx_one = /^[\],:{}\s]*$/;
    var rx_two = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g;
    var rx_three = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g;
    var rx_four = /(?:^|:|,)(?:\s*\[)+/g;
    var rx_escapable = /[\\"\u0000-\u001f\u007f-\u009f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
    var rx_dangerous = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;

    function f(n) {
        return n < 10 ? "0" + n : n;
    }

    function this_value() {
        return this.valueOf();
    }
    if (typeof Date.prototype.toJSON !== "function") {
        Date.prototype.toJSON = function () {
            return isFinite(this.valueOf()) ? this.getUTCFullYear() + "-" + f(this.getUTCMonth() + 1) + "-" + f(this.getUTCDate()) + "T" + f(this.getUTCHours()) + ":" + f(this.getUTCMinutes()) + ":" + f(this.getUTCSeconds()) + "Z" : null;
        };
        Boolean.prototype.toJSON = this_value;
        Number.prototype.toJSON = this_value;
        String.prototype.toJSON = this_value;
    }
    var gap;
    var indent;
    var meta;
    var rep;

    function quote(string) {
        rx_escapable.lastIndex = 0;
        return rx_escapable.test(string) ? "\"" + string.replace(rx_escapable, function (a) {
            var c = meta[a];
            return typeof c === "string" ? c : "\\u" + ("0000" + a.charCodeAt(0).toString(16)).slice(-4);
        }) + "\"" : "\"" + string + "\"";
    }

    function str(key, holder) {
        var i;
        var k;
        var v;
        var length;
        var mind = gap;
        var partial;
        var value = holder[key];
        if (value && typeof value === "object" && typeof value.toJSON === "function") {
            value = value.toJSON(key);
        }
        if (typeof rep === "function") {
            value = rep.call(holder, key, value);
        }
        switch (typeof value) {
        case "string":
            return quote(value);
        case "number":
            return isFinite(value) ? String(value) : "null";
        case "boolean":
        case "null":
            return String(value);
        case "object":
            if (!value) {
                return "null";
            }
            gap += indent;
            partial = [];
            if (Object.prototype.toString.apply(value) === "[object Array]") {
                length = value.length;
                for (i = 0; i < length; i += 1) {
                    partial[i] = str(i, value) || "null";
                }
                v = partial.length === 0 ? "[]" : gap ? "[\n" + gap + partial.join(",\n" + gap) + "\n" + mind + "]" : "[" + partial.join(",") + "]";
                gap = mind;
                return v;
            }
            if (rep && typeof rep === "object") {
                length = rep.length;
                for (i = 0; i < length; i += 1) {
                    if (typeof rep[i] === "string") {
                        k = rep[i];
                        v = str(k, value);
                        if (v) {
                            partial.push(quote(k) + (gap ? ": " : ":") + v);
                        }
                    }
                }
            } else {
                for (k in value) {
                    if (Object.prototype.hasOwnProperty.call(value, k)) {
                        v = str(k, value);
                        if (v) {
                            partial.push(quote(k) + (gap ? ": " : ":") + v);
                        }
                    }
                
评论 58
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值