PythonCookie池细谈 [下集]

PythonCookie池细谈 [下集]

声明:本文只作学习研究,禁止用于非法用途,否则后果自负,如有侵权,请告知删除,谢谢!

引言

本文出自微信公众号【Python三剑客】

作者:阿K

阅读时长:5min

留言:没有精美的文案,没有动情可爱的故事情节,不想随波逐流,暂时只想努力输出好的文章,如果对大家有帮助希望大家能够点赞、收藏、分享一键三连!在此感谢各位读者!

在这里插入图片描述

最近看了咚咚枪的Cookie池觉得不错就分享给大家了,无论从模块的复用性和健壮性来说都比较好。

目录结构

CookiePool
-service     【平台登陆文件夹】
--common     【公共文件夹】
---chaojiying.py  【超级鹰接口文件】
---en_captcha.png 【验证码图片】
--bilibili.py【bilibili登陆文件】
-run.py      【启动文件】
-server.py   【Cookie检测服务】
-setting.py  【平台账号密码、打码平台的账号密码、Redis数据库等配置文件】

上码上码

bilibili.py

  • 该模块主要用于selenium或者post方式请求登陆,然后返回cookie_dict

import time
from io import BytesIO
import random

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from PIL import Image
from services.common import chaojiying
from services.base_service import BaseService


class BiliLoginService(BaseService):
    name = "bili"
    login_url = "https://passport.bilibili.com/login"

    def __init__(self, settings):
        self.user_name = settings.Accounts[self.name]["username"]
        self.pass_word = settings.Accounts[self.name]["password"]
        chrome_options = Options()
        # chrome_options.add_argument("--disable-extensions")
        # chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
        self.browser = webdriver.Chrome(executable_path="D:/c盘下载/chromedriver.exe",
                                   options=chrome_options)

    def check_login(self):
        try:
            self.browser.find_element_by_xpath("//span[contains(text(),'创作中心')]")
            return True
        except Exception as e:
            return False

    def compare_pixel(self, image1, image2, i, j):
        # 判断两个像素是否相同
        pixel1 = image1.load()[i, j]
        pixel2 = image2.load()[i, j]
        threshold = 60
        if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                pixel1[2] - pixel2[2]) < threshold:
            return True
        return False

    def crop_image(self, image_file_name):
        #截取验证码图片
        time.sleep(2)
        img = self.browser.find_element_by_css_selector(".geetest_canvas_img.geetest_absolute")
        location = img.location
        print("图片的位置: ", location)
        size = img.size

        top, buttom, left, right = location["y"], location["y"]+size["height"], location["x"], location["x"]+size["width"]
        print("验证码截图坐标: ", left, top, buttom, right)
        screen_shot = self.browser.get_screenshot_as_png()
        screen_shot = Image.open(BytesIO(screen_shot))
        captcha = screen_shot.crop((int(left), int(top), int(right), int(buttom)))
        captcha.save(image_file_name)
        return captcha

    def login(self):
        import time
        try:
            self.browser.maximize_window()  # 将窗口最大化防止定位错误
        except Exception as e:
            pass

        while not self.check_login():
            self.browser.get(self.login_url)
            username_ele = self.browser.find_element_by_css_selector("#login-username")
            password_ele = self.browser.find_element_by_css_selector("#login-passwd")
            username_ele.send_keys(self.user_name)
            password_ele.send_keys(self.pass_word)

            #1. 点击登录调出滑动验证码
            login_btn = self.browser.find_element_by_css_selector(".btn.btn-login")
            login_btn.click()

            #等待一段时间,等待滑动验证码出现
            time.sleep(5)

            #执行js改变css样式,显示没有缺口的图!!!
            self.browser.execute_script('document.querySelectorAll("canvas")[3].style=""')
            #截取验证码
            image1 = self.crop_image("captcha1.png")

            # 执行js改变css样式,显示有缺口的图!!!!!重点是这一步!
            self.browser.execute_script('document.querySelectorAll("canvas")[3].style="display: none;"')
            image2 = self.crop_image("captcha2.png")

            left = 60
            has_find = False
            for i in range(60, image1.size[0]):
                if has_find:
                    break
                for j in range(image1.size[1]):
                    if not self.compare_pixel(image1, image2, i, j):
                        left = i
                        has_find = True
                        break
            left -= 6
            print(left)

            # 拖动图片
            # 根据偏移量获取移动轨迹
            # 一开始加速,然后减速,生长曲线,且加入点随机变动
            # 移动轨迹
            track = []
            # 当前位移
            current = 0
            # 减速阈值
            mid = left * 3 / 4
            # 间隔时间
            t = 0.1
            v = 0
            while current < left:
                if current < mid:
                    a = random.randint(2, 3)
                else:
                    a = - random.randint(6, 7)
                v0 = v
                # 当前速度
                v = v0 + a * t
                # 移动距离
                move = v0 * t + 1 / 2 * a * t * t
                # 当前位移
                current += move
                track.append(round(move))

            slider = self.browser.find_element_by_css_selector(".geetest_slider_button")
            ActionChains(self.browser).click_and_hold(slider).perform()
            for x in track:
                ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
            time.sleep(0.5)
            ActionChains(self.browser).release().perform()
            time.sleep(5)

        Cookies = self.browser.get_cookies()
        print(Cookies)
        cookie_dict = {}
        for cookie in Cookies:
            cookie_dict[cookie['name']] = cookie['value']
        self.browser.close()
        return cookie_dict

    def check_cookie(self, cookie_dict):
        pass


if __name__ == "__main__":
    import settings
    bili = BiliLoginService(settings)
    bili.login()


server.py

  • 该模块主要通过多线程对Cookie进行检测

import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial

import redis


# 1. 如何确保每一个网站都会被单独的运行
class CookieServer():
    def __init__(self, settings):
        self.redis_cli = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
        self.service_list = []
        self.settings = settings

    def register(self, cls):
        self.service_list.append(cls)

    def login_service(self, srv):
        while 1:
            srv_cli = srv(self.settings)
            srv_name = srv_cli.name
            cookie_nums = self.redis_cli.scard(self.settings.Accounts[srv_name]["cookie_key"])
            if cookie_nums < self.settings.Accounts[srv_name]["max_cookie_nums"]:
                cookie_dict = srv_cli.login()
                self.redis_cli.sadd(self.settings.Accounts[srv_name]["cookie_key"], json.dumps(cookie_dict))
            else:
                print("{srv_name} 的cookie池已满,等待10s".format(srv_name=srv_name))
                time.sleep(10)

    # celery
    def check_cookie_service(self, srv):
        while 1:
            print("开始检测cookie是否可用")
            srv_cli = srv(self.settings)
            srv_name = srv_cli.name
            all_cookies = self.redis_cli.smembers(self.settings.Accounts[srv_name]["cookie_key"])
            print("目前可用cookie数量: {}".format(len(all_cookies)))
            for cookie_str in all_cookies:
                print("获取到cookie: {}".format(cookie_str))
                cookie_dict = json.loads(cookie_str)
                valid = srv_cli.check_cookie(cookie_dict)
                if valid:
                    print("cookie 有效")
                else:
                    print("cookie已经失效, 删除cookie")
                    self.redis_cli.srem(self.settings.Accounts[srv_name]["cookie_key"], cookie_str)
            # 设置间隔,防止出现请求过于频繁,导致本来没失效的cookie失效了
            interval = self.settings.Accounts[srv_name]["check_interval"]
            print("{}s 后重新开始检测cookie".format(interval))
            time.sleep(interval)

    def start(self):
        task_list = []
        print("启动登录服务")
        login_executor = ThreadPoolExecutor(max_workers=5)
        for srv in self.service_list:
            task = login_executor.submit(partial(self.login_service, srv))
            task_list.append(task)

        print("启动cookie检测服务")
        check_executor = ThreadPoolExecutor(max_workers=5)
        for srv in self.service_list:
            task = check_executor.submit(partial(self.check_cookie_service, srv))
            task_list.append(task)

        for future in as_completed(task_list):
            data = future.result()
            print(data)

setting.py

  • 该模块用于存放数据库和平台登陆的账号密码等【当然了账号密码的存放方式也不限于当前】

#超级鹰配置
CJY_USERNAME = "【chaojiying】"
CJY_PASSWORD = "【chaojiying】"

#redis的相关设置
REDIS_HOST = "127.0.0.1"
REDIS_PORT = 6379

#各个网站的登陆账号信息
Accounts = {
    "zhihu": {
        "username": "【账户名】",
        "password": "【密码】",
        "cookie_key": "zhihu:cookies",
        "max_cookie_nums":1,
        "check_interval":30
    },
    "bili": {
        "username": "【账户名】",
        "password": "【密码】",
        "cookie_key": "bili:cookies",
        "max_cookie_nums":1,
        "check_interval":30
    }
}

启动文件

from server import CookieServer
from services.zhihu import ZhihuLoginService

import settings

srv = CookieServer(settings)

#注册需要登录的服务
srv.register(ZhihuLoginService)

#启动cookie服务
print("启动cookie池服务")
srv.start()

今天的分享就到这里了,大家可以到我的资源库关注后免费下载,即可获取(CookiePool)[https://download.youkuaiyun.com/download/weixin_38640052/19880874]

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值