PythonCookie池细谈 [下集]
声明:本文只作学习研究,禁止用于非法用途,否则后果自负,如有侵权,请告知删除,谢谢!
引言
本文出自微信公众号【Python三剑客】
作者:阿K
阅读时长:5min
留言:没有精美的文案,没有动情可爱的故事情节,不想随波逐流,暂时只想努力输出好的文章,如果对大家有帮助希望大家能够点赞、收藏、分享一键三连!在此感谢各位读者!
最近看了咚咚枪的Cookie池觉得不错就分享给大家了,无论从模块的复用性和健壮性来说都比较好。
目录结构
CookiePool
-service 【平台登陆文件夹】
--common 【公共文件夹】
---chaojiying.py 【超级鹰接口文件】
---en_captcha.png 【验证码图片】
--bilibili.py【bilibili登陆文件】
-run.py 【启动文件】
-server.py 【Cookie检测服务】
-setting.py 【平台账号密码、打码平台的账号密码、Redis数据库等配置文件】
上码上码
bilibili.py
-
该模块主要用于selenium或者post方式请求登陆,然后返回cookie_dict
import time
from io import BytesIO
import random
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from PIL import Image
from services.common import chaojiying
from services.base_service import BaseService
class BiliLoginService(BaseService):
name = "bili"
login_url = "https://passport.bilibili.com/login"
def __init__(self, settings):
self.user_name = settings.Accounts[self.name]["username"]
self.pass_word = settings.Accounts[self.name]["password"]
chrome_options = Options()
# chrome_options.add_argument("--disable-extensions")
# chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
self.browser = webdriver.Chrome(executable_path="D:/c盘下载/chromedriver.exe",
options=chrome_options)
def check_login(self):
try:
self.browser.find_element_by_xpath("//span[contains(text(),'创作中心')]")
return True
except Exception as e:
return False
def compare_pixel(self, image1, image2, i, j):
# 判断两个像素是否相同
pixel1 = image1.load()[i, j]
pixel2 = image2.load()[i, j]
threshold = 60
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
pixel1[2] - pixel2[2]) < threshold:
return True
return False
def crop_image(self, image_file_name):
#截取验证码图片
time.sleep(2)
img = self.browser.find_element_by_css_selector(".geetest_canvas_img.geetest_absolute")
location = img.location
print("图片的位置: ", location)
size = img.size
top, buttom, left, right = location["y"], location["y"]+size["height"], location["x"], location["x"]+size["width"]
print("验证码截图坐标: ", left, top, buttom, right)
screen_shot = self.browser.get_screenshot_as_png()
screen_shot = Image.open(BytesIO(screen_shot))
captcha = screen_shot.crop((int(left), int(top), int(right), int(buttom)))
captcha.save(image_file_name)
return captcha
def login(self):
import time
try:
self.browser.maximize_window() # 将窗口最大化防止定位错误
except Exception as e:
pass
while not self.check_login():
self.browser.get(self.login_url)
username_ele = self.browser.find_element_by_css_selector("#login-username")
password_ele = self.browser.find_element_by_css_selector("#login-passwd")
username_ele.send_keys(self.user_name)
password_ele.send_keys(self.pass_word)
#1. 点击登录调出滑动验证码
login_btn = self.browser.find_element_by_css_selector(".btn.btn-login")
login_btn.click()
#等待一段时间,等待滑动验证码出现
time.sleep(5)
#执行js改变css样式,显示没有缺口的图!!!
self.browser.execute_script('document.querySelectorAll("canvas")[3].style=""')
#截取验证码
image1 = self.crop_image("captcha1.png")
# 执行js改变css样式,显示有缺口的图!!!!!重点是这一步!
self.browser.execute_script('document.querySelectorAll("canvas")[3].style="display: none;"')
image2 = self.crop_image("captcha2.png")
left = 60
has_find = False
for i in range(60, image1.size[0]):
if has_find:
break
for j in range(image1.size[1]):
if not self.compare_pixel(image1, image2, i, j):
left = i
has_find = True
break
left -= 6
print(left)
# 拖动图片
# 根据偏移量获取移动轨迹
# 一开始加速,然后减速,生长曲线,且加入点随机变动
# 移动轨迹
track = []
# 当前位移
current = 0
# 减速阈值
mid = left * 3 / 4
# 间隔时间
t = 0.1
v = 0
while current < left:
if current < mid:
a = random.randint(2, 3)
else:
a = - random.randint(6, 7)
v0 = v
# 当前速度
v = v0 + a * t
# 移动距离
move = v0 * t + 1 / 2 * a * t * t
# 当前位移
current += move
track.append(round(move))
slider = self.browser.find_element_by_css_selector(".geetest_slider_button")
ActionChains(self.browser).click_and_hold(slider).perform()
for x in track:
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
time.sleep(0.5)
ActionChains(self.browser).release().perform()
time.sleep(5)
Cookies = self.browser.get_cookies()
print(Cookies)
cookie_dict = {}
for cookie in Cookies:
cookie_dict[cookie['name']] = cookie['value']
self.browser.close()
return cookie_dict
def check_cookie(self, cookie_dict):
pass
if __name__ == "__main__":
import settings
bili = BiliLoginService(settings)
bili.login()
server.py
-
该模块主要通过多线程对Cookie进行检测
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial
import redis
# 1. 如何确保每一个网站都会被单独的运行
class CookieServer():
def __init__(self, settings):
self.redis_cli = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
self.service_list = []
self.settings = settings
def register(self, cls):
self.service_list.append(cls)
def login_service(self, srv):
while 1:
srv_cli = srv(self.settings)
srv_name = srv_cli.name
cookie_nums = self.redis_cli.scard(self.settings.Accounts[srv_name]["cookie_key"])
if cookie_nums < self.settings.Accounts[srv_name]["max_cookie_nums"]:
cookie_dict = srv_cli.login()
self.redis_cli.sadd(self.settings.Accounts[srv_name]["cookie_key"], json.dumps(cookie_dict))
else:
print("{srv_name} 的cookie池已满,等待10s".format(srv_name=srv_name))
time.sleep(10)
# celery
def check_cookie_service(self, srv):
while 1:
print("开始检测cookie是否可用")
srv_cli = srv(self.settings)
srv_name = srv_cli.name
all_cookies = self.redis_cli.smembers(self.settings.Accounts[srv_name]["cookie_key"])
print("目前可用cookie数量: {}".format(len(all_cookies)))
for cookie_str in all_cookies:
print("获取到cookie: {}".format(cookie_str))
cookie_dict = json.loads(cookie_str)
valid = srv_cli.check_cookie(cookie_dict)
if valid:
print("cookie 有效")
else:
print("cookie已经失效, 删除cookie")
self.redis_cli.srem(self.settings.Accounts[srv_name]["cookie_key"], cookie_str)
# 设置间隔,防止出现请求过于频繁,导致本来没失效的cookie失效了
interval = self.settings.Accounts[srv_name]["check_interval"]
print("{}s 后重新开始检测cookie".format(interval))
time.sleep(interval)
def start(self):
task_list = []
print("启动登录服务")
login_executor = ThreadPoolExecutor(max_workers=5)
for srv in self.service_list:
task = login_executor.submit(partial(self.login_service, srv))
task_list.append(task)
print("启动cookie检测服务")
check_executor = ThreadPoolExecutor(max_workers=5)
for srv in self.service_list:
task = check_executor.submit(partial(self.check_cookie_service, srv))
task_list.append(task)
for future in as_completed(task_list):
data = future.result()
print(data)
setting.py
-
该模块用于存放数据库和平台登陆的账号密码等【当然了账号密码的存放方式也不限于当前】
#超级鹰配置
CJY_USERNAME = "【chaojiying】"
CJY_PASSWORD = "【chaojiying】"
#redis的相关设置
REDIS_HOST = "127.0.0.1"
REDIS_PORT = 6379
#各个网站的登陆账号信息
Accounts = {
"zhihu": {
"username": "【账户名】",
"password": "【密码】",
"cookie_key": "zhihu:cookies",
"max_cookie_nums":1,
"check_interval":30
},
"bili": {
"username": "【账户名】",
"password": "【密码】",
"cookie_key": "bili:cookies",
"max_cookie_nums":1,
"check_interval":30
}
}
启动文件
from server import CookieServer
from services.zhihu import ZhihuLoginService
import settings
srv = CookieServer(settings)
#注册需要登录的服务
srv.register(ZhihuLoginService)
#启动cookie服务
print("启动cookie池服务")
srv.start()
今天的分享就到这里了,大家可以到我的资源库关注后免费下载,即可获取(CookiePool)[https://download.youkuaiyun.com/download/weixin_38640052/19880874]