目标网站:aHR0cHM6Ly93d3cubGVib25jb2luLmZyLw==
极验3.0
滑块图片使用canvas画布展示,pyppeteer执行toData也无法获取原图
pyppeteer拦截图片URL下载下来为乱序
if 'static.geetest.com/pictures/gt/' and '.webp' and '/bg/' in request. url:
if 'pagead2.googlesyndication' not in request. url:
if self. picture_url_bg_gap == '' :
self. picture_url_bg_gap = request. url
self. logger. error( request. url)
await request. continue_( )
elif 'static.geetest.com/pictures/gt/' and '.png' and '/slice/' in request. url:
if self. picture_url_slice == '' :
self. picture_url_slice = request. url
self. logger. error( request. url)
await request. continue_( )
乱序图片还原算法
def parse_bg_captcha ( self, img, save_path= None ) :
if isinstance ( img, ( str , Path) ) :
_img = Image. open ( img)
elif isinstance ( img, bytes ) :
_img = Image. open ( io. BytesIO( img) )
else :
raise ValueError(
f'输入图片类型错误, 必须是<type str>/<type Path>/<type bytes>: { type ( img) } ' )
_Ge = [ 39 , 38 , 48 , 49 , 41 , 40 , 46 , 47 , 35 , 34 , 50 , 51 , 33 , 32 , 28 , 29 , 27 , 26 , 36 , 37 , 31 , 30 , 44 , 45 , 43 ,
42 , 12 , 13 , 23 , 22 , 14 , 15 , 21 , 20 , 8 , 9 , 25 , 24 , 6 , 7 , 3 , 2 , 0 , 1 , 11 , 10 , 4 , 5 , 19 , 18 , 16 , 17 ]
w_sep, h_sep = 10 , 80
new_img = Image. new( 'RGB' , ( 260 , 160 ) )
for idx in range ( len ( _Ge) ) :
x = _Ge[ idx] % 26 * 12 + 1
y = h_sep if _Ge[ idx] > 25 else 0
img_cut = _img. crop( ( x, y, x + w_sep, y + h_sep) )
new_x = idx % 26 * 10
new_y = h_sep if idx > 25 else 0
new_img. paste( img_cut, ( new_x, new_y) )
if save_path is not None :
save_path = Path( save_path) . resolve( ) . __str__( )
new_img. save( save_path)
return save_path
识别缺口位置
def get_slice_x ( self, bg_path, slice_path) :
bg_img = cv2. imread( bg_path, 0 )
tp_img = cv2. imread( slice_path, 0 )
bg_edge = cv2. Canny( bg_img, 100 , 200 )
tp_edge = cv2. Canny( tp_img, 100 , 200 )
bg_pic = cv2. cvtColor( bg_edge, cv2. COLOR_GRAY2RGB)
tp_pic = cv2. cvtColor( tp_edge, cv2. COLOR_GRAY2RGB)
res = cv2. matchTemplate( bg_pic, tp_pic, cv2. TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2. minMaxLoc( res)
X = max_loc[ 0 ]
th, tw = tp_pic. shape[ : 2 ]
tl = max_loc
br = ( tl[ 0 ] + tw, tl[ 1 ] + th)
cv2. rectangle( bg_img, tl, br, ( 0 , 0 , 255 ) , 2 )
cv2. imwrite( 'out.jpg' , bg_img)
return X
避免轨迹检测生成轨迹算法
def slide_list ( self, total_length) :
v = 0
t = 1
slide_result = [ ]
current = 0
mid = total_length * 3 / 5
while current < total_length:
if current < mid:
a = 0.4
else :
a = - 0.5
v0 = v
s = v0 * t + 0.5 * a * ( t ** 2 )
current += s
slide_result. append( round ( s) )
v = v0 + a * t
return slide_result
根据生成的轨迹滑动滑块
length_list = self. slide_list( x)
for length in length_list:
await self. page. mouse. move( self. page. mouse. _x + length, self. page. mouse. _y, { 'delay' : random. randint( 1000 , 2000 ) , 'steps' : 3 } )
await self. page. mouse. move( self. page. mouse. _x - 1 , self. page. mouse. _y, { 'delay' : random. randint( 1000 , 2000 ) , 'steps' : 3 } )
pyppeteer拦截成功请求后的request
如何获得request示例
try :
proxy = { }
req = {
"headers" : request. headers,
"data" : request. postData,
"proxy" : proxy,
"timeout" : 5 ,
"ssl" : False ,
}
try :
async with aiohttp_session. request(
method= request. method, url= request. url, ** req
) as response:
body = await response. read( )
except Exception as e:
body = ''
self. logger. error( e)
await request. abort( )
resp = { "body" : body, "headers" : response. headers,
"status" : response. status}
if response. status == 200 :
self. request = request
self. stops = True
await request. respond( resp)
except Exception as e:
self. logger. error( e)
获得request后请求示例
async def get_data ( self, request, i) :
myurl = 'https://www.leboncoin.fr/recherche?text={}&page={}' . format (
self. keyword, i)
try :
proxy = { }
req = {
"headers" : request. headers,
"data" : request. postData,
"proxy" : proxy,
"timeout" : 5 ,
"ssl" : False ,
}
async with aiohttp_session. request(
method= request. method, url= myurl, ** req
) as response:
body = await response. read( )
if response. status == 200 :
return response
else :
raise Exception( myurl, response. status)
except Exception as e:
self. logger. error( myurl, e)
raise Exception( e)
pyppeteer的请求返回监听
async def intercept_network_request ( self, request) :
if '' in request. url:
await request. continue_( )
async def intercept_network_response ( self, response) :
if '' in response. url:
await request. continue_( )
loop = asyncio. get_event_loop( )
loop. run_until_complete( self. page. setRequestInterception( True ) )
self. page. on( 'request' , lambda request: asyncio. create_task( self. intercept_network_request( request) ) )
self. page. on( 'response' , lambda response: asyncio. create_task( self. intercept_network_response( response) ) )
完整代码
完整代码