手搓全自动文章多平台发布系统:4、脚本方式加载不同的站点

前面,我们已经可以方便的读取站点,并完成了相关浏览器的测试,下面的重点是传入一个数字,让通过这个数字自动打开网页。然后我们会根据情况,进行是否已经登录账号的判断。

main.py的代码如下:

import sys
from PyQt6.QtWidgets import QApplication
from modules.ui_components import TableManager  # 导入界面类

if __name__ == "__main__":
    app = QApplication(sys.argv)
    window = TableManager()  # 创建窗体实例
    window.show()            # 显示窗体

    sys.exit(app.exec())

修改playwright_manager.py中的内容。方便传入数字进行工作。即把原来的main.py修改。

 代码如下:

# playwright_manager.py
import os
from playwright.async_api import async_playwright
import sys
import asyncio
from pathlib import Path
# 动态添加项目根目录到路径
current_dir = Path(__file__).parent
project_root = current_dir.parent
sys.path.append(str(project_root))
from config.config import ConfigManager

# Browser类负责管理浏览器实例
USER_DATA_DIR = os.path.join(project_root, "User Data")

class Browser:
    def __init__(self, headless=False):
        self._playwright = None
        self._context = None  # 现在保存BrowserContext实例
        self._visible = headless

    async def start(self):
        self._playwright = await async_playwright().start()
        # 使用launch_persistent_context并传递user_data_dir参数
        self._context = await self._playwright.chromium.launch_persistent_context(
            user_data_dir=USER_DATA_DIR,
            headless=self._visible  # 根据visible参数设置headless模式
        )
        print("上下文: started")
        return self._context

    async def close(self):
        if self._context:
            await self._context.close()
        if self._playwright:
            await self._playwright.stop()

class Page:
    def __init__(self, context, site_row=0):
        self.context = context
        self._page = None
        self.site_row = site_row
        self.config = ConfigManager()

    async def open_url(self):
        url = self.config.get_site_url(self.site_row)
        print(f"Opening URL: {url}")
        # 使用持久化上下文创建新页面
        self._page = await self.context.new_page()
        await self._page.goto(url)
        try:
            await self._page.wait_for_load_state("networkidle", timeout=10000)  # 设置超时时间
            print("网页加载完成》》》")
        except TimeoutError:
            print("页面加载超时!")
        return self._page

    async def close(self):
        if self._page:
            await self._page.close()
            print("Page closed")

async def browser_main(idx:int):
    try:
        browser = Browser(headless=False)  # visible=False表示无头模式
        context = await browser.start()  # 获取持久化上下文
        page = Page(context, site_row=idx+1)
        await page.open_url()

        # 阻塞等待用户输入(防止程序退出)
        input("按任意键关闭浏览器...")
        await page.close()
        await browser.close()
    except Exception as e:
        print(f"Error occurred: {e}")
        if browser:
            await browser.close()

if __name__ == "__main__":
    asyncio.run( browser_main(0))

以下代码为ui_components.py,目前可以得到选择框的内容及行号

如下:

from PyQt6.QtCore import Qt
from PyQt6.QtGui import QIcon
from PyQt6.QtWidgets import *
import sys
from pathlib import Path

# 动态添加项目根目录到路径
current_dir = Path(__file__).parent
project_root = current_dir.parent
sys.path.append(str(project_root))

from config.config import ConfigManager


# 按钮面板类
class ButtonPanel(QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.parent_widget = parent
        self.init_ui()

    def init_ui(self):
        layout = QVBoxLayout()
        layout.setSpacing(10)
        layout.setAlignment(Qt.AlignmentFlag.AlignTop)

        buttons = [
            ("读取配置", self.load_config_clicked, "icons/load.png"),
            ("发布文章", self.placeholder, "icons/publish.png"),
            ("批量操作", self.placeholder, "icons/batch.png"),
            ("设置", self.placeholder, "icons/settings.png")
        ]

        for text, func, icon in buttons:
            btn = QPushButton(text)
            btn.setIcon(QIcon(icon))
            btn.setStyleSheet("padding: 10px; font-size: 14px;")
            btn.clicked.connect(func)
            layout.addWidget(btn)

        self.setLayout(layout)

    def placeholder(self):
        QMessageBox.information(self, "提示", "该功能正在开发中...", QMessageBox.StandardButton.Ok)

    def load_config_clicked(self):
        if self.parent_widget:
            self.parent_widget.load_config()


# 表格面板类
class TablePanel(QWidget):
    def __init__(self, config: ConfigManager):
        super().__init__()
        self.config = config
        self.table = QTableWidget()
        self.init_table()

    def init_table(self):
        self.table.setColumnCount(4)
        self.table.setHorizontalHeaderLabels(['选择', '网站', '网址', '状态'])
        # 调整列宽,确保复选框可见
        self.table.setColumnWidth(0, 50)  # 将列宽增加到50
        self.table.setColumnWidth(1, 200)
        self.table.setColumnWidth(2, 200)
        self.table.setColumnWidth(3, 150)
        self.table.verticalHeader().setDefaultSectionSize(30)
        self.table.setAlternatingRowColors(True)
        # 建立复选框信号连接

    def load_config(self):
        self.table.setRowCount(0)
        for row_idx in range(len(self.config.sites)):
            print(f"当前行号:{row_idx}")
            self.add_row(row_idx)

    def on_checkbox_toggled(self, checked: bool, row: int):
        site_url = self.config.get_site_url(row)
        print(f"当前行号:{row},复选框状态:{checked}, 网址:{site_url}")

    def add_row(self, row_idx):
        row_count = self.table.rowCount()
        self.table.insertRow(row_count)

        # 复选框
        checkbox = QCheckBox()
        # 点击复选框时触发事件11111111111111111111111111111111111111111111111111111
        checkbox.clicked.connect(lambda checked, current_row=row_idx: self.on_checkbox_toggled(checked, current_row))
        # 1111111111111111111111111111111111111111111111111111111111111111111111
        self.table.setCellWidget(row_count, 0, checkbox)

        # 网站名称
        name = QTableWidgetItem(self.config.get_site_name(row_idx))
        name.setTextAlignment(Qt.AlignmentFlag.AlignCenter)
        self.table.setItem(row_count, 1, name)

        # 网址
        url = QTableWidgetItem(self.config.get_site_url(row_idx))
        self.table.setItem(row_count, 2, url)

        # 状态
        status = QTableWidgetItem("未登录")
        self.table.setItem(row_count, 3, status)


# 主窗口类
class TableManager(QWidget):
    def __init__(self):
        super().__init__()
        self.config = ConfigManager()
        self.init_ui()

    def init_ui(self):
        # 创建组件
        self.button_panel = ButtonPanel(self)
        self.table_panel = TablePanel(self.config)

        # 布局设置
        main_layout = QHBoxLayout()
        main_layout.setContentsMargins(20, 20, 20, 20)
        main_layout.addWidget(QWidget(), 1)  # 左侧空白占位
        main_layout.addWidget(self.button_panel, 1)
        main_layout.addWidget(self.table_panel.table, 4)

        self.setLayout(main_layout)
        self.setFixedSize(1000, 800)
        self.setWindowFlag(Qt.WindowType.WindowMaximizeButtonHint, False)
        self.setWindowTitle("文本发布助手")

    def load_config(self):
        self.table_panel.load_config()


if __name__ == '__main__':
    app = QApplication(sys.argv)
    table_manager = TableManager()
    table_manager.show()
    sys.exit(app.exec())

要了解好browser context page的区别及所占资源。

单个上下文 的测试

import os
from playwright.async_api import async_playwright
import sys
import asyncio
from pathlib import Path

# 动态添加项目根目录到路径
current_dir = Path(__file__).parent
project_root = current_dir.parent
sys.path.append(str(project_root))
from config.config import ConfigManager


class BrowserManager:
    _instance = None
    _browser = None
    _context = None
    _pages = {}  # {row_idx: page}
    _lock = asyncio.Lock()  # 使用asyncio.Lock而非threading.Lock
    _init_lock = asyncio.Lock()  # 新增初始化锁
    user_data_dir = os.path.join(project_root, "User Data")

    def __new__(cls):
        if not cls._instance:
            print("创建新实例new")
            cls._instance = super().__new__(cls)
        return cls._instance

    async def initialize(self):
        async with self._init_lock:
            if not self._context:
                print("无上下文,开始初始化")
                # 确保在异步上下文中启动
                playwright = await async_playwright().start()
                self._context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=self.user_data_dir,
                    headless=False
                )
                self._browser = playwright
            print("有上下文已初始化")
            return self._context  # 返回已存在的上下文

    async def get_page(self, row_idx: int):
        async with self._lock:
            if row_idx not in self._pages:
                print(f"创建新页面 {row_idx}")
                page = await self._context.new_page()
                self._pages[row_idx] = page
            print(f"已经有页面 {row_idx}")
            return self._pages[row_idx]

    async def close_page(self, row_idx: int):
        """关闭指定行的页面并从缓存中移除"""
        if row_idx in self._pages:
            page = self._pages.pop(row_idx)
            await page.close()
            print(f"页面 {row_idx} 已关闭")

    async def close_all(self):
        for page in self._pages.values():
            await page.close()
        if self._context:
            await self._context.close()
        if self._browser:
            await self._browser.stop()


async def browser_main(idx: int):
    manager = BrowserManager()
    await manager.initialize()
    page = await manager.get_page(idx)

    config = ConfigManager()
    url = config.get_site_url(idx)
    max_retries = 3
    for retry in range(max_retries):
        try:
            # 修改为等待 load 状态
            await page.goto(url, wait_until="load")
            print(f"页面 {idx} 加载完成")
            break
            # await page.wait_for_load_state("networkidle", timeout=30000)
            # print(f"页面 {idx} 加载完成")

        except Exception as e:
            if retry < max_retries - 1:
                print(f"页面 {idx} 加载失败,重试第 {retry + 1} 次: {e}")
            else:
                print(f"页面 {idx} 加载失败,已达到最大重试次数: {e}")




async def main():
    tasks = [browser_main(0), browser_main(0), browser_main(3), browser_main(4)]
    await asyncio.gather(*tasks)

    manager = BrowserManager()
    context_count = 1 if manager._context else 0
    page_count = len(manager._pages)
    print(f"当前上下文数量: {context_count}")
    print(f"当前页面数量: {page_count}")
    print("当前页面索引:", list(manager._pages.keys()))

    # 使用传入值为 0 对应的页面打开指定网址
    if 3 in manager._pages:
        page = manager._pages[3]
        target_url = "https://www.douban.com/gallery"
        try:
            await page.goto(target_url, wait_until="load")
            print(f"使用传入值为 0 对应的页面打开了网址: {target_url}")
        except Exception as e:
            print(f"打开网址 {target_url} 时出错: {e}")
    else:
        print("未找到索引为  的页面")
    # 添加无限循环,使程序不退出
    while True:
        await asyncio.sleep(1)


if __name__ == "__main__":
    asyncio.run(main())

多个上下文

#多个上下文 和 页面管理
import os
from playwright.async_api import async_playwright
import sys
import asyncio
from pathlib import Path

# 动态添加项目根目录到路径
current_dir = Path(__file__).parent
project_root = current_dir.parent
sys.path.append(str(project_root))
from config.config import ConfigManager


class BrowserManager:
    _instance = None
    _browser = None
    _contexts = {}  # {row_idx: context}
    _pages = {}  # {row_idx: page}
    _lock = asyncio.Lock()
    _init_lock = asyncio.Lock()
    user_data_dir = os.path.join(project_root, "User Data")

    def __new__(cls):
        if not cls._instance:
            print("创建新实例new")
            cls._instance = super().__new__(cls)
        return cls._instance

    async def initialize(self, row_idx):
        async with self._init_lock:
            if row_idx not in self._contexts:
                print(f"为索引 {row_idx} 创建新上下文")
                playwright = await async_playwright().start()
                context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=os.path.join(self.user_data_dir, str(row_idx)),
                    headless=False
                )
                self._contexts[row_idx] = context
                self._browser = playwright
            print(f"索引 {row_idx} 的上下文已初始化")
            return self._contexts[row_idx]

    async def get_page(self, row_idx: int):
        async with self._lock:
            if row_idx not in self._pages:
                print(f"为索引 {row_idx} 创建新页面")
                context = await self.initialize(row_idx)
                page = await context.new_page()
                self._pages[row_idx] = page
            print(f"索引 {row_idx} 已有页面")
            return self._pages[row_idx]

    async def close_page(self, row_idx: int):
        """关闭指定行的页面并从缓存中移除"""
        if row_idx in self._pages:
            page = self._pages.pop(row_idx)
            await page.close()
            print(f"索引 {row_idx} 的页面已关闭")

    async def close_all(self):
        for page in self._pages.values():
            await page.close()
        for context in self._contexts.values():
            await context.close()
        if self._browser:
            await self._browser.stop()


async def browser_main(idx: int):
    manager = BrowserManager()
    page = await manager.get_page(idx)
    config = ConfigManager()
    url = config.get_site_url(idx)
    max_retries = 3
    for retry in range(max_retries):
        try:
            # 修改为等待 load 状态
            await page.goto(url, wait_until="load")
            print(f"索引 {idx} 的页面加载完成")
            break
            # await page.wait_for_load_state("networkidle", timeout=30000)
            # print(f"页面 {idx} 加载完成")

        except Exception as e:
            if retry < max_retries - 1:
                print(f"索引 {idx} 的页面加载失败,重试第 {retry + 1} 次: {e}")
            else:
                print(f"索引 {idx} 的页面加载失败,已达到最大重试次数: {e}")


async def main():
    tasks = [browser_main(0), browser_main(0), browser_main(3), browser_main(4)]
    await asyncio.gather(*tasks)

    manager = BrowserManager()
    context_count = len(manager._contexts)
    page_count = len(manager._pages)
    print(f"当前上下文数量: {context_count}")
    print(f"当前页面数量: {page_count}")
    print("当前页面索引:", list(manager._pages.keys()))

    # 使用传入值为 0 对应的页面打开指定网址
    if 0 in manager._pages:
        page = manager._pages[0]
        target_url = "https://www.douban.com/gallery"
        try:
            await page.goto(target_url, wait_until="load")
            print(f"使用传入值为 0 对应的页面打开了网址: {target_url}")
        except Exception as e:
            print(f"打开网址 {target_url} 时出错: {e}")

    # 添加无限循环,使程序不退出
    while True:
        await asyncio.sleep(1)


if __name__ == "__main__":
    asyncio.run(main())

天黑了。今天下午听了一下午课。时间浪费了。

代码修改说明

  1. _contexts 字典:在 BrowserManager 类中添加了 _contexts 字典,用于存储每个索引对应的上下文。
  2. initialize 方法:修改 initialize 方法,使其根据 row_idx 创建独立的上下文,并将其存储在 _contexts 字典中。每个上下文使用不同的 user_data_dir,以确保用户数据独立。
  3. get_page 方法:在创建新页面时,先调用 initialize 方法获取对应的上下文,然后在该上下文中创建页面。
 

通过这些修改,每个页面都有自己独立的上下文,从而可以实现不同的登录状态。

 明天继续

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

PyAIGCMaster

1毛钱也是爱

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值