Agent-S单元测试:测试用例编写与执行
概述
Agent-S是一个开源的Agent-Computer Interface(ACI,代理-计算机接口)框架,旨在让AI代理能够像人类一样使用计算机。随着项目规模的扩大,建立完善的单元测试体系对于保证代码质量、防止回归错误至关重要。本文将详细介绍Agent-S项目的单元测试编写与执行方法。
测试环境搭建
依赖安装
首先需要安装测试相关的依赖包:
pip install pytest pytest-cov pytest-mock pytest-asyncio
pip install unittest2 # 如果需要兼容unittest
测试目录结构
建议创建以下测试目录结构:
tests/
├── unit/
│ ├── test_core/
│ │ ├── test_engine.py
│ │ ├── test_module.py
│ │ └── test_mllm.py
│ ├── test_agents/
│ │ ├── test_agent_s.py
│ │ ├── test_grounding.py
│ │ └── test_worker.py
│ └── test_utils/
│ ├── test_common_utils.py
│ └── conftest.py
├── integration/
└── fixtures/
核心模块单元测试
Engine模块测试
Engine模块负责与各种LLM API交互,需要测试不同提供商的支持情况:
import pytest
from unittest.mock import Mock, patch
from gui_agents.s2_5.core.engine import OpenAIClient, AnthropicClient, GeminiClient
class TestEngineClients:
@pytest.fixture
def mock_openai_response(self):
return {
"choices": [{
"message": {
"content": "测试响应内容"
}
}]
}
def test_openai_client_initialization(self):
"""测试OpenAI客户端初始化"""
client = OpenAIClient(
base_url="https://api.openai.com/v1",
api_key="test_key",
model="gpt-4"
)
assert client.base_url == "https://api.openai.com/v1"
assert client.model == "gpt-4"
@patch('openai.OpenAI')
def test_openai_generate_success(self, mock_openai, mock_openai_response):
"""测试OpenAI生成成功场景"""
mock_client = Mock()
mock_client.chat.completions.create.return_value = mock_openai_response
mock_openai.return_value = mock_client
client = OpenAIClient(api_key="test_key")
messages = [{"role": "user", "content": "测试消息"}]
response = client.generate(messages)
assert response == "测试响应内容"
mock_client.chat.completions.create.assert_called_once()
MLLM模块测试
MLLM(Multi-Modal Large Language Model)模块处理多模态输入输出:
import base64
from io import BytesIO
from PIL import Image
from gui_agents.s2_5.core.mllm import MLLMAgent
class TestMLLMAgent:
@pytest.fixture
def sample_image(self):
"""创建测试用的图像fixture"""
img = Image.new('RGB', (100, 100), color='red')
buffered = BytesIO()
img.save(buffered, format="PNG")
return buffered.getvalue()
def test_encode_image_base64(self, sample_image):
"""测试图像编码为base64"""
agent = MLLMAgent()
encoded = agent.encode_image(sample_image)
# 验证base64编码有效
assert encoded.startswith('data:image/png;base64,')
# 解码验证数据完整性
base64_data = encoded.split(',')[1]
decoded = base64.b64decode(base64_data)
assert len(decoded) > 0
def test_message_management(self):
"""测试消息管理功能"""
agent = MLLMAgent()
# 添加系统提示
agent.add_system_prompt("你是一个有帮助的AI助手")
assert len(agent.messages) == 1
# 添加用户消息
agent.add_message("用户消息", role="user")
assert len(agent.messages) == 2
assert agent.messages[1]["role"] == "user"
# 移除消息
agent.remove_message_at(0)
assert len(agent.messages) == 1
Agent模块测试策略
AgentS2_5核心功能测试
from unittest.mock import MagicMock
from gui_agents.s2_5.agents.agent_s import AgentS2_5
from gui_agents.s2_5.agents.grounding import OSWorldACI
class TestAgentS2_5:
@pytest.fixture
def mock_grounding_agent(self):
"""创建模拟的grounding agent"""
mock_agent = MagicMock(spec=OSWorldACI)
mock_agent.assign_coordinates.return_value = "pyautogui.click(100, 200)"
return mock_agent
@pytest.fixture
def mock_engine_params(self):
"""模拟引擎参数"""
return {
"engine_type": "openai",
"model": "gpt-4",
"api_key": "test_key"
}
@pytest.fixture
def sample_observation(self, sample_image):
"""创建测试观察数据"""
return {
"screenshot": sample_image,
"timestamp": "2024-01-01T00:00:00"
}
def test_agent_initialization(self, mock_engine_params, mock_grounding_agent):
"""测试Agent初始化"""
agent = AgentS2_5(mock_engine_params, mock_grounding_agent)
assert agent.grounding_agent == mock_grounding_agent
assert hasattr(agent, 'worker')
assert hasattr(agent, 'manager')
@patch('gui_agents.s2_5.agents.agent_s.Worker')
def test_predict_method(self, mock_worker, mock_engine_params,
mock_grounding_agent, sample_observation):
"""测试predict方法"""
# 设置mock worker
mock_worker_instance = MagicMock()
mock_worker_instance.generate_next_action.return_value = (
{"action": "test"}, ["pyautogui.click(100, 200)"]
)
mock_worker.return_value = mock_worker_instance
agent = AgentS2_5(mock_engine_params, mock_grounding_agent)
info, actions = agent.predict("打开浏览器", sample_observation)
assert isinstance(info, dict)
assert isinstance(actions, list)
assert len(actions) > 0
mock_worker_instance.generate_next_action.assert_called_once()
Grounding模块测试
Grounding模块负责将自然语言指令转换为可执行的GUI操作:
from gui_agents.s2_5.agents.grounding import OSWorldACI
class TestOSWorldACI:
@pytest.fixture
def grounding_agent(self):
"""创建grounding agent实例"""
engine_params_gen = {
"engine_type": "openai",
"model": "gpt-4"
}
engine_params_ground = {
"engine_type": "huggingface",
"model": "ui-tars-1.5-7b"
}
return OSWorldACI("linux", engine_params_gen, engine_params_ground)
def test_coordinate_generation(self, grounding_agent, sample_image):
"""测试坐标生成功能"""
obs = {"screenshot": sample_image}
# 测试生成坐标
with patch.object(grounding_agent.engine_for_grounding, 'generate') as mock_generate:
mock_generate.return_value = "[100, 200]"
coords = grounding_agent.generate_coords("按钮", obs)
assert coords == [100, 200]
mock_generate.assert_called_once()
def test_action_decorator_parsing(self):
"""测试action装饰器的参数解析"""
test_function = "click(element_description='按钮', num_clicks=2)"
parsed_args = OSWorldACI.parse_function_args(test_function)
assert "element_description" in parsed_args
assert "num_clicks" in parsed_args
assert parsed_args["element_description"] == "按钮"
assert parsed_args["num_clicks"] == 2
工具函数测试
Common Utils测试
from gui_agents.s2_5.utils.common_utils import (
split_thinking_response,
parse_single_code_from_string,
sanitize_code
)
class TestCommonUtils:
def test_split_thinking_response(self):
"""测试思考响应分割"""
# 测试有思考内容的响应
full_response = "让我思考一下...\n\n最终答案是42"
thinking, final = split_thinking_response(full_response)
assert thinking == "让我思考一下..."
assert final == "最终答案是42"
# 测试没有思考内容的响应
simple_response = "直接回答"
thinking, final = split_thinking_response(simple_response)
assert thinking == ""
assert final == "直接回答"
def test_code_parsing(self):
"""测试代码解析"""
# 测试Python代码块解析
code_string = """
一些文本
```python
print("Hello World")
```
更多文本
"""
code = parse_single_code_from_string(code_string)
assert code == 'print("Hello World")'
# 测试无代码块的情况
no_code = "纯文本内容"
assert parse_single_code_from_string(no_code) is None
def test_code_sanitization(self):
"""测试代码清理"""
dangerous_code = """
import os
os.system("rm -rf /")
print("安全代码")
"""
safe_code = sanitize_code(dangerous_code)
assert "os.system" not in safe_code
assert "print" in safe_code
测试配置与执行
pytest配置
创建pytest.ini配置文件:
[pytest]
testpaths = tests/unit
python_files = test_*.py
python_classes = Test*
python_functions = test_*
addopts = -v --cov=gui_agents --cov-report=html --cov-report=term-missing
测试覆盖率报告
生成详细的测试覆盖率报告:
# 运行所有测试并生成覆盖率报告
pytest --cov=gui_agents --cov-report=html
# 只运行特定模块的测试
pytest tests/unit/test_core/ -v
# 运行特定测试类
pytest tests/unit/test_core/test_engine.py::TestEngineClients -v
持续集成配置
创建GitHub Actions工作流(.github/workflows/test.yml):
name: Agent-S Unit Tests
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, 3.10]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest pytest-cov
- name: Run unit tests
run: |
pytest tests/unit/ --cov=gui_agents --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
高级测试技巧
Mock策略
对于外部依赖,使用适当的mock策略:
from unittest.mock import patch, MagicMock
class TestAdvancedMocking:
def test_multiple_api_calls(self):
"""测试多个API调用的mock"""
with patch('gui_agents.s2_5.core.engine.OpenAIClient.generate') as mock_generate:
# 设置不同的返回值序列
mock_generate.side_effect = [
"第一次响应",
"第二次响应",
"第三次响应"
]
# 测试代码会调用generate三次
# ... 测试逻辑
assert mock_generate.call_count == 3
def test_context_manager_mocking(self):
"""测试上下文管理器的mock"""
mock_response = MagicMock()
mock_response.__enter__.return_value = "模拟响应"
mock_response.__exit__.return_value = None
with patch('some_module.APIClient', return_value=mock_response):
# 测试使用上下文管理器的代码
pass
参数化测试
使用参数化测试覆盖多种场景:
import pytest
@pytest.mark.parametrize("input_text,expected", [
("简单文本", "简单文本"),
("带有\n换行", "带有换行"),
(" 前后空格 ", "前后空格"),
("", ""),
(None, ""),
])
def test_text_normalization(input_text, expected):
"""测试文本标准化函数"""
result = normalize_text(input_text)
assert result == expected
@pytest.mark.parametrize("platform", ["linux", "darwin", "windows"])
def test_platform_specific_behavior(platform):
"""测试不同平台的行为"""
agent = create_agent_for_platform(platform)
assert agent.platform == platform
测试最佳实践
1. 测试金字塔原则
2. 测试命名规范
| 测试类型 | 命名模式 | 示例 |
|---|---|---|
| 单元测试 | test_<method>_<scenario> | test_generate_coords_success |
| 集成测试 | test_<feature>_integration | test_agent_workflow_integration |
| 性能测试 | test_<component>_performance | test_response_time_performance |
3. 测试数据管理
使用fixture工厂模式创建测试数据:
@pytest.fixture
def agent_factory():
"""Agent工厂fixture"""
def _create_agent(platform="linux", model="gpt-4"):
engine_params = {
"engine_type": "openai",
"model": model,
"api_key": "test_key"
}
grounding_params = {
"engine_type": "huggingface",
"model": "ui-tars-1.5-7b"
}
return AgentS2_5(engine_params, OSWorldACI(platform, engine_params, grounding_params))
return _create_agent
def test_agent_with_different_platforms(agent_factory):
"""使用工厂模式测试不同平台"""
linux_agent = agent_factory(platform="linux")
windows_agent = agent_factory(platform="windows")
assert linux_agent.platform == "linux"
assert windows_agent.platform == "windows"
总结
建立完善的单元测试体系对于Agent-S这样的复杂AI框架至关重要。通过本文介绍的测试策略、示例代码和最佳实践,您可以:
- 快速搭建测试环境 - 配置pytest和覆盖率工具
- 编写高质量测试用例 - 覆盖核心模块和边界情况
- 实施持续集成 - 自动化测试执行和报告生成
- 遵循最佳实践 - 确保测试的可维护性和可靠性
定期运行测试套件,保持高测试覆盖率,将显著提高Agent-S项目的代码质量和开发效率。
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



