Agent-S单元测试:测试用例编写与执行

Agent-S单元测试:测试用例编写与执行

【免费下载链接】Agent-S Agent S: an open agentic framework that uses computers like a human 【免费下载链接】Agent-S 项目地址: https://gitcode.com/GitHub_Trending/ag/Agent-S

概述

Agent-S是一个开源的Agent-Computer Interface(ACI,代理-计算机接口)框架,旨在让AI代理能够像人类一样使用计算机。随着项目规模的扩大,建立完善的单元测试体系对于保证代码质量、防止回归错误至关重要。本文将详细介绍Agent-S项目的单元测试编写与执行方法。

测试环境搭建

依赖安装

首先需要安装测试相关的依赖包:

pip install pytest pytest-cov pytest-mock pytest-asyncio
pip install unittest2  # 如果需要兼容unittest

测试目录结构

建议创建以下测试目录结构:

tests/
├── unit/
│   ├── test_core/
│   │   ├── test_engine.py
│   │   ├── test_module.py
│   │   └── test_mllm.py
│   ├── test_agents/
│   │   ├── test_agent_s.py
│   │   ├── test_grounding.py
│   │   └── test_worker.py
│   └── test_utils/
│       ├── test_common_utils.py
│       └── conftest.py
├── integration/
└── fixtures/

核心模块单元测试

Engine模块测试

Engine模块负责与各种LLM API交互,需要测试不同提供商的支持情况:

import pytest
from unittest.mock import Mock, patch
from gui_agents.s2_5.core.engine import OpenAIClient, AnthropicClient, GeminiClient

class TestEngineClients:
    
    @pytest.fixture
    def mock_openai_response(self):
        return {
            "choices": [{
                "message": {
                    "content": "测试响应内容"
                }
            }]
        }
    
    def test_openai_client_initialization(self):
        """测试OpenAI客户端初始化"""
        client = OpenAIClient(
            base_url="https://api.openai.com/v1",
            api_key="test_key",
            model="gpt-4"
        )
        assert client.base_url == "https://api.openai.com/v1"
        assert client.model == "gpt-4"
    
    @patch('openai.OpenAI')
    def test_openai_generate_success(self, mock_openai, mock_openai_response):
        """测试OpenAI生成成功场景"""
        mock_client = Mock()
        mock_client.chat.completions.create.return_value = mock_openai_response
        mock_openai.return_value = mock_client
        
        client = OpenAIClient(api_key="test_key")
        messages = [{"role": "user", "content": "测试消息"}]
        response = client.generate(messages)
        
        assert response == "测试响应内容"
        mock_client.chat.completions.create.assert_called_once()

MLLM模块测试

MLLM(Multi-Modal Large Language Model)模块处理多模态输入输出:

import base64
from io import BytesIO
from PIL import Image
from gui_agents.s2_5.core.mllm import MLLMAgent

class TestMLLMAgent:
    
    @pytest.fixture
    def sample_image(self):
        """创建测试用的图像fixture"""
        img = Image.new('RGB', (100, 100), color='red')
        buffered = BytesIO()
        img.save(buffered, format="PNG")
        return buffered.getvalue()
    
    def test_encode_image_base64(self, sample_image):
        """测试图像编码为base64"""
        agent = MLLMAgent()
        encoded = agent.encode_image(sample_image)
        
        # 验证base64编码有效
        assert encoded.startswith('data:image/png;base64,')
        # 解码验证数据完整性
        base64_data = encoded.split(',')[1]
        decoded = base64.b64decode(base64_data)
        assert len(decoded) > 0
    
    def test_message_management(self):
        """测试消息管理功能"""
        agent = MLLMAgent()
        
        # 添加系统提示
        agent.add_system_prompt("你是一个有帮助的AI助手")
        assert len(agent.messages) == 1
        
        # 添加用户消息
        agent.add_message("用户消息", role="user")
        assert len(agent.messages) == 2
        assert agent.messages[1]["role"] == "user"
        
        # 移除消息
        agent.remove_message_at(0)
        assert len(agent.messages) == 1

Agent模块测试策略

AgentS2_5核心功能测试

from unittest.mock import MagicMock
from gui_agents.s2_5.agents.agent_s import AgentS2_5
from gui_agents.s2_5.agents.grounding import OSWorldACI

class TestAgentS2_5:
    
    @pytest.fixture
    def mock_grounding_agent(self):
        """创建模拟的grounding agent"""
        mock_agent = MagicMock(spec=OSWorldACI)
        mock_agent.assign_coordinates.return_value = "pyautogui.click(100, 200)"
        return mock_agent
    
    @pytest.fixture
    def mock_engine_params(self):
        """模拟引擎参数"""
        return {
            "engine_type": "openai",
            "model": "gpt-4",
            "api_key": "test_key"
        }
    
    @pytest.fixture
    def sample_observation(self, sample_image):
        """创建测试观察数据"""
        return {
            "screenshot": sample_image,
            "timestamp": "2024-01-01T00:00:00"
        }
    
    def test_agent_initialization(self, mock_engine_params, mock_grounding_agent):
        """测试Agent初始化"""
        agent = AgentS2_5(mock_engine_params, mock_grounding_agent)
        
        assert agent.grounding_agent == mock_grounding_agent
        assert hasattr(agent, 'worker')
        assert hasattr(agent, 'manager')
    
    @patch('gui_agents.s2_5.agents.agent_s.Worker')
    def test_predict_method(self, mock_worker, mock_engine_params, 
                           mock_grounding_agent, sample_observation):
        """测试predict方法"""
        # 设置mock worker
        mock_worker_instance = MagicMock()
        mock_worker_instance.generate_next_action.return_value = (
            {"action": "test"}, ["pyautogui.click(100, 200)"]
        )
        mock_worker.return_value = mock_worker_instance
        
        agent = AgentS2_5(mock_engine_params, mock_grounding_agent)
        info, actions = agent.predict("打开浏览器", sample_observation)
        
        assert isinstance(info, dict)
        assert isinstance(actions, list)
        assert len(actions) > 0
        mock_worker_instance.generate_next_action.assert_called_once()

Grounding模块测试

Grounding模块负责将自然语言指令转换为可执行的GUI操作:

from gui_agents.s2_5.agents.grounding import OSWorldACI

class TestOSWorldACI:
    
    @pytest.fixture
    def grounding_agent(self):
        """创建grounding agent实例"""
        engine_params_gen = {
            "engine_type": "openai",
            "model": "gpt-4"
        }
        engine_params_ground = {
            "engine_type": "huggingface",
            "model": "ui-tars-1.5-7b"
        }
        return OSWorldACI("linux", engine_params_gen, engine_params_ground)
    
    def test_coordinate_generation(self, grounding_agent, sample_image):
        """测试坐标生成功能"""
        obs = {"screenshot": sample_image}
        
        # 测试生成坐标
        with patch.object(grounding_agent.engine_for_grounding, 'generate') as mock_generate:
            mock_generate.return_value = "[100, 200]"
            coords = grounding_agent.generate_coords("按钮", obs)
            
            assert coords == [100, 200]
            mock_generate.assert_called_once()
    
    def test_action_decorator_parsing(self):
        """测试action装饰器的参数解析"""
        test_function = "click(element_description='按钮', num_clicks=2)"
        parsed_args = OSWorldACI.parse_function_args(test_function)
        
        assert "element_description" in parsed_args
        assert "num_clicks" in parsed_args
        assert parsed_args["element_description"] == "按钮"
        assert parsed_args["num_clicks"] == 2

工具函数测试

Common Utils测试

from gui_agents.s2_5.utils.common_utils import (
    split_thinking_response,
    parse_single_code_from_string,
    sanitize_code
)

class TestCommonUtils:
    
    def test_split_thinking_response(self):
        """测试思考响应分割"""
        # 测试有思考内容的响应
        full_response = "让我思考一下...\n\n最终答案是42"
        thinking, final = split_thinking_response(full_response)
        
        assert thinking == "让我思考一下..."
        assert final == "最终答案是42"
        
        # 测试没有思考内容的响应
        simple_response = "直接回答"
        thinking, final = split_thinking_response(simple_response)
        
        assert thinking == ""
        assert final == "直接回答"
    
    def test_code_parsing(self):
        """测试代码解析"""
        # 测试Python代码块解析
        code_string = """
        一些文本
        ```python
        print("Hello World")
        ```
        更多文本
        """
        code = parse_single_code_from_string(code_string)
        assert code == 'print("Hello World")'
        
        # 测试无代码块的情况
        no_code = "纯文本内容"
        assert parse_single_code_from_string(no_code) is None
    
    def test_code_sanitization(self):
        """测试代码清理"""
        dangerous_code = """
        import os
        os.system("rm -rf /")
        print("安全代码")
        """
        
        safe_code = sanitize_code(dangerous_code)
        assert "os.system" not in safe_code
        assert "print" in safe_code

测试配置与执行

pytest配置

创建pytest.ini配置文件:

[pytest]
testpaths = tests/unit
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts = -v --cov=gui_agents --cov-report=html --cov-report=term-missing

测试覆盖率报告

生成详细的测试覆盖率报告:

# 运行所有测试并生成覆盖率报告
pytest --cov=gui_agents --cov-report=html

# 只运行特定模块的测试
pytest tests/unit/test_core/ -v

# 运行特定测试类
pytest tests/unit/test_core/test_engine.py::TestEngineClients -v

持续集成配置

创建GitHub Actions工作流(.github/workflows/test.yml):

name: Agent-S Unit Tests

on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]

jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.8, 3.9, 3.10]

    steps:
    - uses: actions/checkout@v3
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
    
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install pytest pytest-cov
    
    - name: Run unit tests
      run: |
        pytest tests/unit/ --cov=gui_agents --cov-report=xml
    
    - name: Upload coverage to Codecov
      uses: codecov/codecov-action@v3
      with:
        file: ./coverage.xml

高级测试技巧

Mock策略

对于外部依赖,使用适当的mock策略:

from unittest.mock import patch, MagicMock

class TestAdvancedMocking:
    
    def test_multiple_api_calls(self):
        """测试多个API调用的mock"""
        with patch('gui_agents.s2_5.core.engine.OpenAIClient.generate') as mock_generate:
            # 设置不同的返回值序列
            mock_generate.side_effect = [
                "第一次响应",
                "第二次响应",
                "第三次响应"
            ]
            
            # 测试代码会调用generate三次
            # ... 测试逻辑
            
            assert mock_generate.call_count == 3
    
    def test_context_manager_mocking(self):
        """测试上下文管理器的mock"""
        mock_response = MagicMock()
        mock_response.__enter__.return_value = "模拟响应"
        mock_response.__exit__.return_value = None
        
        with patch('some_module.APIClient', return_value=mock_response):
            # 测试使用上下文管理器的代码
            pass

参数化测试

使用参数化测试覆盖多种场景:

import pytest

@pytest.mark.parametrize("input_text,expected", [
    ("简单文本", "简单文本"),
    ("带有\n换行", "带有换行"),
    ("  前后空格  ", "前后空格"),
    ("", ""),
    (None, ""),
])
def test_text_normalization(input_text, expected):
    """测试文本标准化函数"""
    result = normalize_text(input_text)
    assert result == expected

@pytest.mark.parametrize("platform", ["linux", "darwin", "windows"])
def test_platform_specific_behavior(platform):
    """测试不同平台的行为"""
    agent = create_agent_for_platform(platform)
    assert agent.platform == platform

测试最佳实践

1. 测试金字塔原则

mermaid

2. 测试命名规范

测试类型命名模式示例
单元测试test_<method>_<scenario>test_generate_coords_success
集成测试test_<feature>_integrationtest_agent_workflow_integration
性能测试test_<component>_performancetest_response_time_performance

3. 测试数据管理

使用fixture工厂模式创建测试数据:

@pytest.fixture
def agent_factory():
    """Agent工厂fixture"""
    def _create_agent(platform="linux", model="gpt-4"):
        engine_params = {
            "engine_type": "openai",
            "model": model,
            "api_key": "test_key"
        }
        grounding_params = {
            "engine_type": "huggingface", 
            "model": "ui-tars-1.5-7b"
        }
        return AgentS2_5(engine_params, OSWorldACI(platform, engine_params, grounding_params))
    return _create_agent

def test_agent_with_different_platforms(agent_factory):
    """使用工厂模式测试不同平台"""
    linux_agent = agent_factory(platform="linux")
    windows_agent = agent_factory(platform="windows")
    
    assert linux_agent.platform == "linux"
    assert windows_agent.platform == "windows"

总结

建立完善的单元测试体系对于Agent-S这样的复杂AI框架至关重要。通过本文介绍的测试策略、示例代码和最佳实践,您可以:

  1. 快速搭建测试环境 - 配置pytest和覆盖率工具
  2. 编写高质量测试用例 - 覆盖核心模块和边界情况
  3. 实施持续集成 - 自动化测试执行和报告生成
  4. 遵循最佳实践 - 确保测试的可维护性和可靠性

定期运行测试套件,保持高测试覆盖率,将显著提高Agent-S项目的代码质量和开发效率。

【免费下载链接】Agent-S Agent S: an open agentic framework that uses computers like a human 【免费下载链接】Agent-S 项目地址: https://gitcode.com/GitHub_Trending/ag/Agent-S

创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值