基本思路:
1.获取验证码原图片img.与缺失图片.
2.两者比较RGB像素点.往往缺口像素点与原图片像素点偏差较大(根据自己需求定)
3.计算0~缺口,识别距离
(横坐标x是时间、纵坐标y是位移,那么每个点的切线就是加速度,会发现这样的一个规律,加速度由小变大,再又大变小,这是最主要的特征之一)
4.模拟人工滑动轨迹(滑动距离 = 终点坐标 - 起点坐标),往往在这一步会卡主很多人,明明已经滑动到缺口位置,可为什么不通过呢?
原因很简单,在当你滑动验证码时,极验会记录你的滑动时间, 滑动轨迹来判断你是否是属于人为滑动轨迹.
这里根据某数据网站进行实验,基本通过率在90%左右:
相关工具使用:selenium,IO,PIL,requests
from selenium.webdriver.support.ui import WebDriverWait # 等待元素加载的
from selenium.webdriver.common.action_chains import ActionChains # 拖拽
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.by import By
from PIL import Image, ImageChops
import requests
import time
import re
import random
from io import BytesIO
from selenium import webdriver
import numpy as np
import json
第一步:进入登录界面进行账号密码登录URL,这里做了滑动次数限制,思想(如果当前拥有大量的账号池,且循环登录保存cookie的 话,如果不增加滑动次数限制,效率太低)
def start_login(name,password):
try:
## 网站URL登录链接
## 账号密码的输入
time.sleep(2)
count = 6 # 最多识别6次
# 等待滑动按钮加载完成
element = main_check_slider(driver)
while count > 0:
main_check_code(driver, element)
time.sleep(2)
try:
# 验证页面是否跳转 如存在跳转则 保存登录 cookie
htmls = driver.page_source
if 'gt_popup_wrap' not in htmls:
cookies = driver.get_cookies()
cookie = [item["name"] + "=" + item["value"] for item in cookies]
cookiestr = ';'.join(item for item in cookie)
print('成功识别!!!!!!')
count = 0
driver.close()
return cookiestr
except NoSuchElementException as e:
print('识别错误,继续')
count -= 1
time.sleep(2)
else:
print('too many attempt check code ')
exit('退出程序')
finally:
pass
第二步:下载验证码无序图片进行顺序拼接(这里会有人疑问,你这sleep上下加的是不是有点多,不影响速度吗?,这个呢~~~跟公司网 络有点关系,可以忽略不计)
拼接问题:这个根据你当前网站来定,当前网站验证码图片是由52张小图片依次加载进来,不是完整Img.
(1)下载验证码图片
def get_image(driver, div_path):
'''
下载无序的图片 然后进行拼接 获得完整的图片
:param driver:
:param div_path:
:return:
'''
time.sleep(2)
background_images = driver.find_elements_by_xpath(div_path)
location_list = []
for background_image in background_images:
location = {}
result = re.findall('background-image: url\("(.*?)"\); background-position: (.*?)px (.*?)px;',
background_image.get_attribute('style'))
# print(result)
location['x'] = int(result[0][1])
location['y'] = int(result[0][2])
image_url = result[0][0]
location_list.append(location)
print('==================================')
image_url = image_url.replace('webp', 'jpg')
image_result = requests.get(image_url).content
# with open('1.jpg','wb') as f:
# f.write(image_result)
image_file = BytesIO(image_result) # 是一张无序的图片
image = merge_image(image_file, location_list)
return image
当前为下载后的无序图片.
(2):对无序图片进行结构拼接:
def merge_image(image_file, location_list):
"""
拼接图片
:param image_file:
:param location_list:
:return:
"""
im = Image.open(image_file)
im.save('Orig.jpg')
new_im = Image.new('RGB', (260, 116))
# 把无序的图片 切成52张小图片
im_list_upper = []
im_list_down = []
# print(location_list)
for location in location_list:
# print(location['y'])
if location['y'] == -58: # 上半边
im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x']) + 10, 116)))
if location['y'] == 0: # 下半边
im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 58)))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset, 0)) # 把小图片放到 新的空白图片上
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset, 58))
x_offset += im.size[0]
new_im.save('Now.jpg')
return new_im
当前为处理后的图片样子.
第三步,检查滑动按钮是否加载完成:
def main_check_slider(driver):
"""
检查滑动按钮是否加载
:param driver:
:return:
"""
while True:
try:
element = WebDriverWait(driver, 30, 0.5).until(
EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
if element:
return element
except TimeoutException as e:
print('超时错误,继续')
time.sleep(5)
第四步,计算缺口图片与完整图片RGB像素点(寻找缺口位置):
def get_distance(image1, image2):
'''
拿到滑动验证码需要移动的距离
:param image1:没有缺口的图片对象
:param image2:带缺口的图片对象
:return:需要移动的距离
'''
# print('size', image1.size)
threshold = 50
for i in range(0, image1.size[0]): # 260
for j in range(0, image1.size[1]): # 160
pixel1 = image1.getpixel((i, j))
pixel2 = image2.getpixel((i, j))
res_R = abs(pixel1[0] - pixel2[0]) # 计算RGB差
res_G = abs(pixel1[1] - pixel2[1]) # 计算RGB差
res_B = abs(pixel1[2] - pixel2[2]) # 计算RGB差
if res_R > threshold and res_G > threshold and res_B > threshold:
return i # 需要移动的距离
第五步,计算起始值到缺口位置X,y距离
( 拿到移动轨迹,模仿人的滑动行为,先匀加速后匀减速
匀变速运动基本公式:
①v=v0+at
②s=v0t+(1/2)at2
③v2-v02=2as) :
def get_track(distance):
'''
:param distance: 需要移动的距离
:return: 存放每0.2秒移动的距离
'''
初速度
v = 0
# 单位时间为0.2s来统计轨迹,轨迹即0.2内的位移
t = 0.2
# 位移/轨迹列表,列表内的一个元素代表0.2s的位移
tracks = []
# 当前的位移
current = 0
# 到达mid值开始减速
mid = distance * 9/ 10
print(mid)
# distance += random.random(0,5) # 先滑过一点,最后再反着滑动回来
# a = random.randint(1,3)
while current < distance:
if current < mid:
# 加速度越小,单位时间的位移越小,模拟的轨迹就越多越详细
a = random.randint(4, 6) # 加速运动
else:
a = -random.randint(3, 5) # 减速运动
# 初速度
v0 = v
# 0.2秒时间内的位移
s = v0 * t + 0.5 * a * (t ** 2)
# 当前的位置
current += s
# 添加到轨迹列表
tracks.append(round(s))
# 速度已经达到v,该速度作为下次的初速度
v = v0 + a * t
#反着滑动到大概准确位置
for i in range(4):
tracks.append(-random.randint(2, 3))
for i in range(5):
tracks.append(-random.randint(1, 3))
第六步,点击按钮,滑动,关闭:
def main_check_code(driver, element):
"""
拖动识别验证码
:param driver:
:param element:
:return:
"""
image1 = get_image(driver, '//div[@class="gt_cut_bg gt_show"]/div')
image2 = get_image(driver, '//div[@class="gt_cut_fullbg gt_show"]/div')
# 图片上 缺口的位置的x坐标
# 2 对比两张图片的所有RBG像素点,得到不一样像素点的x值,即要移动的距离
l = get_distance(image1, image2)
l = int(l) + 1
print('l=', l)
# 3 获得移动轨迹
track_list = get_track(l)
print('第一步,点击滑动按钮')
ActionChains(driver).click_and_hold(on_element=element).perform() # 点击鼠标左键,按住不放
# time.sleep(1)
print('第二步,拖动元素')
for track in track_list:
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform() # 鼠标移动到距离当前位置(x,y)
a = [0.02, 0.03, 0.05, 0.1, 0.01, 0.05, 0.07, 0.08, 0.09,0.03,0.04]
b = random.sample(a, 1)[0]
print(b)
time.sleep(b)
# if l>100:
ActionChains(driver).move_by_offset(xoffset=-random.randint(2, 5), yoffset=0).perform()
# time.sleep(1)
print('第三步,释放鼠标')
ActionChains(driver).release(on_element=element).perform()
time.sleep(5)
效果图成功率90%