爬虫技术-cookie反爬讲解

本文详细介绍了如何应对网站使用Cookie进行反爬虫的策略,包括Cookie的加密原理,通过Python进行Cookie逆向实践,利用jsdom技术解析JavaScript,并提供了实战案例,解析JS代码,模拟请求以获取所需数据。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

COOkIE反爬虫

1 cookie反爬简介

Cookie 反爬虫指的是服务器端通过校验请求头中的 Cookie 值来区分正常用户和爬虫程序的手段,这种手段被广泛应用在 Web 应用中。

1.1 cookie加密原理

image

2 cookie逆向实践

2.1 逆向目标
2.2 逆向分析
  • 可以发现抓取接口数据,需要携带对应的cookie才能正常获取

image

通过查看cookie信息发现,该cookie为服务端返回image

查询发现,cookie值为每次请求首页时返回的image

2.3 逆向结果
  • 使用python先提取cookie,然后携带请求可以正常突破
import re
import httpx
from utils import ua


def get_ck():
    url = 'http://www.zjmazhang.gov.cn/hdjlpt/published?via=pc'
    res = httpx.request('GET',url)
    cookie = res.cookies.get('szxx_session')
    token = re.findall(r"var _CSRF = '(.*?)';",res.text)[0]
    return cookie,token

def index():
    c, t = get_ck()
    url = 'http://www.zjmazhang.gov.cn/hdjlpt/letter/pubList'
    headers = {
        'user-agent': ua.FakeChromeUA.get_ua(),
        'X-CSRF-TOKEN': t,
        'Cookie': 'XSRF-TOKEN={};szxx_session={}'.format(t, c)
    }
    data = {
        "offset": "0",
        "limit": "20",
        "site_id": "759010",
    }
    res = httpx.post(url,data=data,headers=headers)
    if res.status_code == 200:
        print(res.text)
    else:
        print('请求错误')

index()

3. cookie逆向实践2

3.1 逆向目标
3.2 逆向分析
3.2.1 定位关键点
  • 使用hook技术,确认cookie生成位置
(function () {
  Object.defineProperty(document, 'cookie', {
    set: function (val) {
      if (val.indexOf('v') != -1) {
        debugger;
      }
      console.log('Hook捕获到cookie设置->', val);
      return val;
    }
  });
})();

image

cookie存放的2中形式:

headers = {
   cookie:'key1=value1;key2=value2;key3:value3'
}
cookie = {
    'key1': 'value1',
    'key2':'value2'
}
3.2.2 JS调试技巧

从这里可以发现就是整个的文件只有千多行代码,可以采用全复制方式处理cookie

image

注: 有很多的环境需要补,挨个补环境很麻烦,可以使用jsdom这个技术

3.3 逆向结果
点击查看代码
const jsdom = require("jsdom");
const { JSDOM } = jsdom;
const dom = new JSDOM(`<!DOCTYPE html><p>Hello world</p>`);
window=dom.window;
document=window.document;
navigator=window.navigator;
// const express = require('express');
// const app = express();


var _v = {}

var TOKEN_SERVER_TIME = 1656072000.984;
!function(n, t) {
    !function() {
        var r, e, a;
        r = e = a = n;
        var u, c, s;
        u = c = s = t;
        function v() {
            var n = arguments[s[0]];
            if (!n)
                return r[0];
            for (var t = u[1], o = a[1], i = c[2]; i < n.length; i++) {
                var v = n.charCodeAt(i)
                  , f = v ^ o;
                o = v,
                t += e[2].fromCharCode(f)
            }
            return t
        }
        var f = c[3]
          , l = s[4]
          , p = Wn(e[3], r[4], s[5])
          , d = a[5]
          , h = Wn(c[6], s[7])
          , g = c[8]
          , w = c[9]
          , m = r[6]
          , I = u[10]
          , y = a[7]
          , _ = (s[11],
        c[12],
        s[13])
          , C = e[8]
          , E = u[14]
          , A = ot(e[9], e[10])
          , b = a[11]
          , T = u[15]
          , B = c[16]
          , R = r[12]
          , k = r[13]
          , S = s[17]
          , P = u[18]
          , M = Wn(s[19], s[20], u[21])
          , O = v(s[22], e[14])
          , D = s[23]
          , x = s[24]
          , N = u[25]
          , L = u[26]
          , W = Wn(s[27], r[15])
          , F = u[28]
          , Y = r[16]
          , j = a[17]
          , H = e[18]
          , $ = e[19]
          , U = r[20]
          , V = v(c[29], e[21], e[22])
          , X = s[30]
          , G = s[31]
          , K = s[32]
          , Q = s[33]
          , Z = r[23]
          , q = r[24]
          , z = v(u[12], u[34], s[35])
          , J = u[36]
          , nn = a[25]
          , tn = s[37]
          , rn = c[38]
          , en = r[26]
          , an = c[39]
          , on = s[40]
          , un = a[27]
          , cn = u[41]
          , sn = ot(s[42], c[43])
          , vn = r[28]
          , fn = u[8]
          , ln = s[44]
          , pn = a[29]
          , dn = s[45]
          , hn = a[30]
          , gn = c[46]
          , wn = a[31]
          , mn = a[32]
          , In = s[47]
          , yn = r[33]
          , _n = a[34]
          , Cn = c[48]
          , En = a[8]
          , An = v(a[35], s[49])
          , bn = c[50]
          , Tn = c[51]
          , Bn = at(r[36], s[52])
          , Rn = ot(r[37], e[38])
          , kn = e[39]
          , Sn = u[53]
          , Pn = r[40]
          , Mn = s[54]
          , On = s[55]
          , Dn = Wn(u[56], r[41], r[42])
          , xn = r[43]
          , Nn = u[57]
          , Ln = e[44];
        function Wn() {
            return arguments[u[0]].split(e[0]).reverse().join(c[1])
        }
        var Fn = r[45], Yn = Wn(c[58], e[46]), jn = v(s[59], u[60]), Hn = Wn(r[47], s[61]), $n = s[62], Un = s[63], Vn = u[2], Xn = [new u[27](r[48]), new u[27](c[64])], Gn = [new e[47](ot(a[49])), new u[27](ot(a[50], u[65]))], Kn = c[66][f + l] || r[51].getElementsByTagName(p + d)[r[52]], Qn;
        !function(n) {
            n[e[53]] = s[67];
            function t(n) {
                var t = r[51][u[68]]
                  , o = u[69] + n + s[70]
                  , i = t.indexOf(o);
                if (i == -e[54]) {
                    if (o = n + c[70],
                    t.substr(r[52], o.length) != o)
                        return;
                    i = a[52]
                }
                var f = i + o[v(u[71], s[72])]
                  , l = t.indexOf(r[55], f);
                return l == -a[54] && (l = t[a[56]]),
                t.substring(f, l)
            }
            n[a[57]] = f;
            function o(n, t, a, o, i) {
                var c = n + r[58] + t;
                o && (c += e[59] + o),
                i && (c += v(Jn, u[73], s[74]) + i),
                a && (c += u[75] + a),
                u[66][u[68]] = c
            }
            n[s[76]] = t;
            function i(n, t, r) {
                this.setCookie(n, u[1], u[77], t, r)
            }
            n[s[78]] = o;
            function f() {
                var t = a[60];
                this.setCookie(t, u[67]),
                this.getCookie(t) || (n[r[53]] = e[61]),
                this.delCookie(t)
            }
            n[Wn(N, r[62], c[79])] = i
        }(Qn || (Qn = {}));
        var Zn;
        !function(n) {
            var t = u[80], o = v(nn, s[81], u[82]), i = s[67], f, l = u[83][Wn(u[84], e[63])], p, d;
            function g(n) {
                var t = j;
                return t = dn,
                i ? y(n) : f ? w(n) : void u[2]
            }
            function w(n) {
                E(function() {
                    return n = R(n),
                    f.getAttribute(n)
                })()
            }
            function m() {
                try {
                    return !!(o in s[83] && s[83][o])
                } catch (n) {
                    return void u[2]
                }
            }
            function I(n) {
                try {
                    f.removeItem(n)
                } catch (t) {}
            }
            n[c[85]] = C;
            function y(n) {
                try {
                    return f.getItem(n)
                } catch (t) {
                    return u[86]
                }
            }
            n[c[87]] = B;
            function _(n, t) {
                try {
                    f.setItem(n, t)
                } catch (r) {}
            }
            function C() {
                var n = e[64]
                  , r = u[88];
                if (i = m(),
                i)
                    f = a[65][o];
                else if (l[at(e[66])][at(e[67], a[68])])
                    try {
                        p = new ActiveXObject(Wn(a[69], s[89], l)),
                        p.open(),
                        p.write(s[90]),
                        p.close(),
                        d = p.w[e[70]][s[2]][e[71]],
                        f = d.createElement(n + t + r)
                    } catch (c) {
                        f = l.createElement(o),
                        d = l[Wn(u[91], a[72])] || l.getElementsByTagName(s[92])[s[2]] || l[a[73]]
                    }
            }
            function E(n) {
                return function() {
                    d.appendChild(f),
                    f.addBehavior(s[93]),
                    f.load(o);
                    var t = n();
                    return d.removeChild(f),
                    t
                }
            }
            n[c[94]] = g;
            function A(n) {
                var t, r, e;
                if (t = r = e = a,
                i)
                    I(n);
                else {
                    if (!f)
                        return void e[52];
                    b(n)
                }
            }
            function b(n) {
                E(function() {
                    n = R(n),
                    f.removeAttribute(n),
                    f.save(o)
                })()
            }
            function T(n, t) {
                E(function() {
                    n = R(n),
                    f.setAttribute(n, t),
                    f.save(o)
                })()
            }
            n[a[74]] = A;
            function B(n, t) {
                if (void 0 === t)
                    return A(n);
                if (i)
                    _(n, t);
                else {
                    if (!f)
                        return void u[2];
                    T(n, t)
                }
            }
            function R(n) {
                var t = s[95]
                  , e = r[75]
                  , a = new r[47](t + h + e,c[96]);
                return n.replace(new c[27](u[97]), v(s[98], s[99], s[100])).replace(a, c[101])
            }
        }(Zn || (Zn = {}));
        var qn = function() {
            var n, t, r;
            n = t = r = a;
            var e, o, i;
            e = o = i = s;
            var u = o[15]
              , c = o[102]
              , f = e[103];
            function l(r) {
                var a = o[102]
                  , i = e[103];
                this[n[76]] = r;
                for (var u = t[52], c = r[a + g + i]; u < c; u++)
                    this[u] = t[52]
            }
            return l[e[104]][w + m + I + u] = function() {
                for (var a = e[105], u = this[a + y], c = [], s = -e[0], v = o[2], f = u[r[56]]; v < f; v++)
                    for (var l = this[v], p = u[v], d = s += p; c[d] = l & parseInt(t[77], n[78]),
                    --p != r[52]; )
                        --d,
                        l >>= parseInt(n[79], i[106]);
                return c
            }
            ,
            l[v
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值