告别Redis依赖:手把手教你为Proxy Pool添加自定义存储后端
你是否在使用Proxy Pool时受限于Redis或SSDB存储?是否需要将代理数据存储到公司内部数据库?本文将带你从零开始实现自定义存储后端,5个步骤让Proxy Pool支持任意数据库,轻松应对企业级部署需求。
读完本文你将掌握:
- 理解Proxy Pool存储架构设计
- 编写符合规范的存储后端类
- 注册并配置新的存储引擎
- 完整的功能测试与错误排查
- 生产环境部署最佳实践
存储架构解析:Proxy Pool如何管理代理数据
Proxy Pool采用工厂模式设计存储层,通过db/dbClient.py统一管理不同存储后端。系统默认提供Redis和SSDB两种实现,其核心架构如下:
存储后端需要实现10个核心方法,其中get()、put()和update()是最常用的操作。所有存储实现都遵循相同的接口规范,这使得扩展新存储变得简单可控。
步骤1:创建存储后端类文件
在db目录下创建新的存储实现文件,命名格式为[数据库名]Client.py。以MySQL为例,创建db/mysqlClient.py文件,基础结构如下:
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: mysqlClient.py
Description : MySQL存储后端实现
Author : Your Name
date: 2025/10/18
-------------------------------------------------
"""
from handler.logHandler import LogHandler
import json
import pymysql
class MysqlClient(object):
"""
MySQL client
MySQL中代理存放的结构为表:
表名由changeTable方法指定,字段包括proxy(主键)和json_data
"""
def __init__(self, **kwargs):
self.name = "" # 表名
self.logger = LogHandler('mysql_client')
self.__conn = pymysql.connect(
host=kwargs.get('host'),
port=kwargs.get('port', 3306),
user=kwargs.get('username'),
password=kwargs.get('password'),
db=kwargs.get('db'),
charset='utf8mb4'
)
self.__create_table() # 确保表存在
def __create_table(self):
"""创建存储代理的表"""
with self.__conn.cursor() as cursor:
sql = f"""
CREATE TABLE IF NOT EXISTS `{self.name}` (
`proxy` VARCHAR(50) NOT NULL PRIMARY KEY,
`json_data` TEXT NOT NULL,
`create_time` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`update_time` TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
"""
cursor.execute(sql)
self.__conn.commit()
步骤2:实现核心接口方法
参照db/redisClient.py和db/ssdbClient.py的实现,完成10个核心方法。以下是关键方法示例:
实现代理添加与查询
def put(self, proxy_obj):
"""将代理放入MySQL"""
try:
with self.__conn.cursor() as cursor:
sql = f"INSERT INTO `{self.name}` (proxy, json_data) VALUES (%s, %s) ON DUPLICATE KEY UPDATE json_data=%s"
cursor.execute(sql, (proxy_obj.proxy, proxy_obj.to_json, proxy_obj.to_json))
self.__conn.commit()
return True
except Exception as e:
self.logger.error(f"存储代理失败: {str(e)}")
self.__conn.rollback()
return False
def get(self, https):
"""随机返回一个代理"""
try:
with self.__conn.cursor() as cursor:
if https:
sql = f"SELECT json_data FROM `{self.name}` WHERE json_data LIKE '%\"https\": true%' ORDER BY RAND() LIMIT 1"
else:
sql = f"SELECT json_data FROM `{self.name}` ORDER BY RAND() LIMIT 1"
cursor.execute(sql)
result = cursor.fetchone()
return result[0] if result else None
except Exception as e:
self.logger.error(f"获取代理失败: {str(e)}")
return None
实现代理更新与删除
def update(self, proxy_obj):
"""更新代理属性"""
try:
with self.__conn.cursor() as cursor:
sql = f"UPDATE `{self.name}` SET json_data=%s WHERE proxy=%s"
cursor.execute(sql, (proxy_obj.to_json, proxy_obj.proxy))
self.__conn.commit()
return cursor.rowcount > 0
except Exception as e:
self.logger.error(f"更新代理失败: {str(e)}")
self.__conn.rollback()
return False
def delete(self, proxy_str):
"""删除指定代理"""
try:
with self.__conn.cursor() as cursor:
sql = f"DELETE FROM `{self.name}` WHERE proxy=%s"
cursor.execute(sql, (proxy_str,))
self.__conn.commit()
return cursor.rowcount > 0
except Exception as e:
self.logger.error(f"删除代理失败: {str(e)}")
self.__conn.rollback()
return False
步骤3:注册新的存储后端
修改db/dbClient.py,在工厂类中注册新的存储后端:
def __initDbClient(self):
"""初始化数据库客户端"""
__type = None
if "SSDB" == self.db_type:
__type = "ssdbClient"
elif "REDIS" == self.db_type:
__type = "redisClient"
elif "MYSQL" == self.db_type: # 添加MySQL支持
__type = "mysqlClient"
else:
pass
assert __type, '不支持的数据库类型: {}'.format(self.db_type)
self.client = getattr(__import__(__type), "%sClient" % self.db_type.title())(
host=self.db_host,
port=self.db_port,
username=self.db_user,
password=self.db_pwd,
db=self.db_name
)
步骤4:配置与使用新存储后端
修改setting.py中的数据库连接字符串,格式遵循URL规范:
# 修改前: Redis连接
# DB_CONN = 'redis://:pwd@127.0.0.1:6379/0'
# 修改后: MySQL连接
DB_CONN = 'mysql://user:password@127.0.0.1:3306/proxy_db'
步骤5:测试与验证
创建测试用例文件test/testMysqlClient.py,验证所有功能:
import unittest
from db.mysqlClient import MysqlClient
class TestMysqlClient(unittest.TestCase):
def setUp(self):
self.client = MysqlClient(host='127.0.0.1', port=3306, username='user', password='password', db='proxy_db')
self.client.changeTable('test_proxy')
def test_put_and_get(self):
"""测试添加和获取代理"""
from helper.proxy import Proxy
proxy = Proxy('127.0.0.1:8080')
proxy.https = True
self.assertTrue(self.client.put(proxy))
result = self.client.get(https=True)
self.assertIn('127.0.0.1:8080', result)
def tearDown(self):
self.client.clear()
if __name__ == '__main__':
unittest.main()
部署与扩展建议
-
连接池优化:对于MySQL等关系型数据库,建议添加连接池管理,参考RedisClient的BlockingConnectionPool实现
-
批量操作:实现批量插入和批量查询方法,优化性能:
def batch_put(self, proxy_list):
"""批量添加代理"""
if not proxy_list:
return True
try:
with self.__conn.cursor() as cursor:
sql = f"INSERT INTO `{self.name}` (proxy, json_data) VALUES (%s, %s) ON DUPLICATE KEY UPDATE json_data=%s"
data = [(p.proxy, p.to_json, p.to_json) for p in proxy_list]
cursor.executemany(sql, data)
self.__conn.commit()
return True
except Exception as e:
self.logger.error(f"批量存储代理失败: {str(e)}")
self.__conn.rollback()
return False
- 索引优化:为常用查询条件添加索引,如为
json_data字段添加全文索引提升HTTPS代理查询速度
通过以上步骤,你已经成功为Proxy Pool添加了MySQL存储后端。这一模式可以推广到任何数据库,只需实现相同的接口方法。完整代码示例可参考项目中的Redis和SSDB实现,祝你的Proxy Pool更加灵活强大!
如果觉得本文对你有帮助,别忘了点赞收藏,关注作者获取更多Proxy Pool高级用法!
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



