使用python、scrapy
import execjs# 安装pip install PyExecJS 用这种方式只是为了调试,实际使用中还是要用js引擎v8
看了不少博客是要用到模拟浏览器phantomjs、chrome什么的,其实有很多网站的js会发现这种操作(无头模式下navigatoe.webdriver为true,绕过检测要设置该属性)。
下面一个网站为例(具体网站不公开!):
<script>var x="attachEvent@String@@@if@Sun@e@PqwgnPCMHw@781@@@fromCharCode@chars@match@@@@substr@new@@GMT@@onreadystatechange@@@@innerHTML@@@@challenge@@location@3@@DOMContentLoaded@D@Expires@0xFF@Mar@for@__cdn_clearance@@addEventListener@@href@div@@13@else@@join@@@@2@document@@cookie@https@@pathname@@@d@while@@@31@@firstChild@charCodeAt@parseInt@try@@a@Path@@catch@@1@@window@function@g@@15@false@reverse@MLPxP5@@RegExp@replace@return@headless@search@f@1554042973@charAt@@split@@@@length@0xEDB88320@36@BGZKFSTk@1500@createElement@toString@@@8@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@setTimeout@eval@@var@JgSe0upZ@@19@captcha@Array@@toLowerCase@0@".replace(/@*$/,"").split("@"),y="54 36=3f(){51('1a.20=1a.2g+1a.44.41(/[\\?|&]58-18/,\\'\\')',4h);2b.2d='1j=46.9|5c|'+(3f(){54 36=59(+[[1b+(+!+[])+((+!+[])+[~~{}])/[(+!+[])+(+!+[])]]]),2m=['1e',(-~{}/~~[]+[]).47(~~[])+[3e.43+[]+[[]][5c]][5c].47(~~''),'4g%',[-~[[2a]*(-~~~[]+(+!+[])+(+!+[]))]],'8%',[2a],'3l',[(-~[]|2a)]];1i(54 1c=5c;1c<2m.4d;1c++){36.3k()[1c]=2m[1c]};42 36.26('')})()+';1f=6, 30-1h-57 3i:4f:23 l;38=/;'};5((3f(){35{42 !!3e.1l;}3a(7){42 3j;}})()){2b.1l('1d',36,3j)}24{2b.1('10',36)}",f=function(x,y){var a=0,b=0,c=0;x=x.split("");y=y||99;while((a=x.shift