from DrissionPage import Chromium
import pymysql # 使用 PyMySQL 连接 MySQL 数据库
# 初始化浏览器相关
def initialize_browser():
"""初始化浏览器并返回最新标签页"""
browser = Chromium()
tab = browser.latest_tab
return tab
def visit_page(tab, url):
"""访问指定页面"""
tab.get(url)
def url_list(tab):
return str(tab.url).split('/')
def start_listening(tab, graphql_url):
"""开始监听 GraphQL 请求"""
tab.listen.start(graphql_url)
def refresh_page(tab):
"""刷新页面以触发请求"""
tab.refresh()
# 数据提取相关
def extract_product_info(response_body, sex, product_type):
"""从响应内容中提取产品信息"""
if not isinstance(response_body, dict):
print("响应内容不是字典类型")
return []
# 检查是否有错误
if "errors" in response_body and response_body["errors"]:
print("GraphQL 请求出错:", response_body["errors"])
return []
# 提取产品信息
data = response_body.get("data", {}).get("categoryByUrl", {}).get("products", {})
if not data:
print("未找到 data 字段")
return []
nodes = data.get("nodes", [])
if not nodes:
print("未找到产品信息")
return []
product_info_list = []
for product in nodes:
try:
# 检查 variantProduct 是否存在
variant_product = product.get("variantProduct")
if not variant_product:
print(f"跳过无效产品:variantProduct 为空")
continue
# 提取产品信息
product_info = (
sex, # 性别
product.get("id"), # 产品 ID
variant_product.get("name"), # 名称
variant_product.get("subHeader"), # 子标题
variant_product.get("price"), # 价格
product.get("masterId"), # 主 ID
variant_product.get("colorName"), # 颜色名称
variant_product.get("preview"), # 预览链接
product_type # 类型
)
product_info_list.append(product_info)
except KeyError as e:
print(f"提取产品信息时发生错误:缺少字段 {e}")
return product_info_list
# 数据库操作相关
def create_db_connection():
"""创建并返回数据库连接"""
try:
connection = pymysql.connect(
host="127.0.0.1", # 数据库主机地址
user="root", # 数据库用户名
password="123456", # 数据库密码
database="bm", # 数据库名称
charset='utf8mb4', # 字符集
)
return connection
except pymysql.MySQLError as err:
print(f"数据库连接失败:{err}")
return None
def insert_into_database(connection, product_info_list):
"""将产品信息插入到数据库"""
if not connection:
print("数据库连接无效")
return
try:
with connection.cursor() as cursor:
# 插入 SQL 语句
insert_query = """
INSERT INTO products (
sex, id, name, sub_header, price, masterId, colorName, preview, type
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
# 批量插入数据
cursor.executemany(insert_query, product_info_list)
# 提交事务
connection.commit()
print(f"成功插入 {len(product_info_list)} 条记录")
except pymysql.MySQLError as err:
print(f"数据库插入失败:{err}")
connection.rollback() # 回滚事务
# 数据处理与主流程
def process_packets(tab):
"""遍历监听到的数据包并处理"""
for packet in tab.listen.steps():
lists = url_list(tab)
response_body = packet.response.body # 获取响应内容
product_info_list = extract_product_info(response_body, lists[5], lists[len(lists) - 1])
if product_info_list:
print("提取的产品信息:", product_info_list)
# 创建数据库连接
connection = create_db_connection()
# 将提取到的产品信息插入到数据库
insert_into_database(connection, product_info_list)
# 关闭数据库连接
if connection:
connection.close()
def main():
# 初始化浏览器
tab = initialize_browser()
# 访问目标页面
url = ''
visit_page(tab, url)
# 开始监听 GraphQL 请求
graphql_url = ''
lists = start_listening(tab, graphql_url)
# 刷新页面以触发请求
refresh_page(tab)
# 处理数据包
process_packets(tab)
if __name__ == "__main__":
main()