import asyncio
import aiohttp
import json
import time
import os
import random
from typing import List, Dict, Any
import pandas as pd
from datetime import datetime
import asyncio
from asyncio import Semaphore
import statistics
import csv
# ===================== 配置 =====================
VLLM_URL = os.getenv("VLLM_URL", "http://localhost:8001/v1/chat/completions")
VLLM_MODEL = os.getenv("VLLM_MODEL", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
API_BASE_URL = "http://localhost:8001" # 你的FastAPI服务地址
# 压力测试结果
pressure_test_results = []
# 详细回答记录
detailed_answers = []
# ===================== 读取测试问题 =====================
def load_test_questions(file_path: str = "questions.txt", num_questions: int = 20) -> List[str]:
"""从文件中读取测试问题"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
questions = [line.strip() for line in f if line.strip()]
if len(questions) == 0:
print("⚠️ 问题文件为空,使用默认问题集")
return [
"电视电话会议和国际会议分别按什么管理?",
"正高级技术职称专业人员和局级领导干部的师资费如何计算?",
"公司会议费用报销需要哪些材料?",
"差旅费报销标准是什么?",
"项目经费使用有哪些规定?"
] * 4 # 复制成20个问题
if len(questions) <= num_questions:
return questions
else:
return random.sample(questions, num_questions)
except FileNotFoundError:
print(f"❌ 问题文件 {file_path} 未找到,使用默认问题集")
return [
"电视电话会议和国际会议分别按什么管理?",
"正高级技术职称专业人员和局级领导干部的师资费如何计算?",
"公司会议费用报销需要哪些材料?",
] * 7 # 复制成21个问题
fileIDS = [] # 此处省略
# ===================== 处理流式响应 =====================
async def process_stream_response(response) -> Dict[str, Any]:
"""处理流式响应并提取完整回答"""
full_response = ""
metadata_info = {}
status_messages = []
error_info = None
async for line in response.content:
if line:
decoded_line = line.decode('utf-8').strip()
if decoded_line.startswith('data: '):
try:
data = json.loads(decoded_line[6:])
event_type = data.get('event')
if event_type == 'metadata':
metadata_info = data
elif event_type == 'status':
status_messages.append(data.get('status', ''))
elif event_type == 'answer' and 'text' in data:
full_response += data['text']
elif event_type == 'end':
break
elif event_type == 'error':
error_info = data.get('error', '未知错误')
break
except json.JSONDecodeError as e:
print(f"JSON解析错误: {e}, 原始数据: {decoded_line}")
continue
return {
'full_response': full_response,
'metadata': metadata_info,
'status_messages': status_messages,
'error': error_info,
'success': error_info is None and full_response != ""
}
# ===================== 单个用户请求模拟 =====================
async def simulate_single_user(user_id: int, questions: List[str], request_semaphore: Semaphore) -> Dict[str, Any]:
"""模拟单个用户的请求行为"""
user_results = []
# 每个用户随机问1-3个问题
num_questions = random.randint(1, 3)
user_questions = random.sample(questions, num_questions)
print(f"👤 用户{user_id:03d} 开始提问,共{num_questions}个问题")
for i, question in enumerate(user_questions, 1):
async with request_semaphore:
try:
request_start_time = time.time()
# 构造请求数据
request_data = {
"system_id": "lXI5C2kB2Zv5Ezs9Mmzbhmcuz9ekghDm", # 需要替换为实际的system_id
"fileID": fileIDS, # 使用固定的测试文件
"type": 0,
"category": "全部",
"query": question,
"model": "deepseek"
}
# 发送请求
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=300)) as session:
async with session.post(
f"{API_BASE_URL}/llm/stream/answer",
json=request_data
) as response:
if response.status != 200:
error_msg = f"HTTP错误: {response.status}"
user_results.append({
'user_id': user_id,
'question': question,
'success': False,
'response_time': time.time() - request_start_time,
'error': error_msg,
'answer_length': 0,
'full_answer': "",
'metadata': {},
'status_messages': []
})
continue
# 处理流式响应
stream_result = await process_stream_response(response)
response_time = time.time() - request_start_time
result_data = {
'user_id': user_id,
'question': question,
'success': stream_result['success'],
'response_time': response_time,
'answer_length': len(stream_result['full_response']),
'full_answer': stream_result['full_response'],
'answer_preview': stream_result['full_response'][:100] + "..." if stream_result[
'full_response'] else "",
'metadata': stream_result['metadata'],
'status_messages': stream_result['status_messages'],
'error': stream_result['error']
}
user_results.append(result_data)
if stream_result['success']:
print(
f" ✅ 用户{user_id:03d} 问题{i} 完成 - 耗时: {response_time:.2f}s, 回答长度: {len(stream_result['full_response'])}")
else:
print(f" ❌ 用户{user_id:03d} 问题{i} 失败 - 错误: {stream_result['error']}")
except asyncio.TimeoutError:
error_msg = "请求超时"
user_results.append({
'user_id': user_id,
'question': question,
'success': False,
'response_time': time.time() - request_start_time,
'error': error_msg,
'answer_length': 0,
'full_answer': "",
'metadata': {},
'status_messages': []
})
print(f" ❌ 用户{user_id:03d} 问题{i} 超时")
except Exception as e:
error_msg = f"请求异常: {str(e)}"
user_results.append({
'user_id': user_id,
'question': question,
'success': False,
'response_time': time.time() - request_start_time,
'error': error_msg,
'answer_length': 0,
'full_answer': "",
'metadata': {},
'status_messages': []
})
print(f" ❌ 用户{user_id:03d} 问题{i} 异常: {str(e)}")
return {
'user_id': user_id,
'total_questions': num_questions,
'results': user_results
}
# ===================== 并发压力测试 =====================
async def run_pressure_test(concurrent_users: int, questions: List[str], max_concurrent_requests: int = 50):
"""运行指定并发用户数的压力测试"""
print(f"\n{'🚀' * 20}")
print(f"🔥 开始压力测试 - 并发用户数: {concurrent_users}")
print(f"{'🚀' * 20}")
test_start_time = time.time()
# 限制并发请求数,避免过度压垮服务器
request_semaphore = Semaphore(max_concurrent_requests)
# 创建所有用户任务
user_tasks = []
for user_id in range(1, concurrent_users + 1):
task = simulate_single_user(user_id, questions, request_semaphore)
user_tasks.append(task)
# 同时启动所有用户
print(f"🎬 启动 {concurrent_users} 个并发用户...")
user_results = await asyncio.gather(*user_tasks, return_exceptions=True)
test_duration = time.time() - test_start_time
# 统计结果
successful_requests = 0
failed_requests = 0
response_times = []
total_questions = 0
all_request_results = []
for user_result in user_results:
if isinstance(user_result, Exception):
print(f"❌ 用户任务异常: {user_result}")
continue
for request_result in user_result['results']:
all_request_results.append(request_result)
total_questions += 1
if request_result['success']:
successful_requests += 1
response_times.append(request_result['response_time'])
# 保存详细回答到全局列表
detailed_answers.append({
'concurrent_users': concurrent_users,
'user_id': request_result['user_id'],
'question': request_result['question'],
'response_time': round(request_result['response_time'], 3),
'answer_length': request_result['answer_length'],
'full_answer': request_result['full_answer'],
'answer_preview': request_result['answer_preview'],
'metadata': json.dumps(request_result['metadata'], ensure_ascii=False) if request_result[
'metadata'] else "",
'status_messages': ' | '.join(request_result['status_messages']),
'success': True,
'test_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
else:
failed_requests += 1
# 保存失败记录
detailed_answers.append({
'concurrent_users': concurrent_users,
'user_id': request_result['user_id'],
'question': request_result['question'],
'response_time': round(request_result['response_time'], 3),
'answer_length': 0,
'full_answer': "",
'answer_preview': "",
'metadata': json.dumps(request_result['metadata'], ensure_ascii=False) if request_result.get(
'metadata') else "",
'status_messages': ' | '.join(request_result.get('status_messages', [])),
'success': False,
'error': request_result.get('error', '未知错误'),
'test_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
})
# 计算统计指标
if response_times:
avg_response_time = statistics.mean(response_times)
min_response_time = min(response_times)
max_response_time = max(response_times)
p95_response_time = statistics.quantiles(response_times, n=20)[18] if len(response_times) >= 20 else max(
response_times)
success_rate = (successful_requests / total_questions) * 100
requests_per_second = total_questions / test_duration
else:
avg_response_time = min_response_time = max_response_time = p95_response_time = 0
success_rate = 0
requests_per_second = 0
# 记录测试结果
test_result = {
'并发用户数': concurrent_users,
'总请求数': total_questions,
'成功请求数': successful_requests,
'失败请求数': failed_requests,
'成功率(%)': round(success_rate, 2),
'测试总时长(秒)': round(test_duration, 2),
'平均响应时间(秒)': round(avg_response_time, 3),
'最小响应时间(秒)': round(min_response_time, 3),
'最大响应时间(秒)': round(max_response_time, 3),
'P95响应时间(秒)': round(p95_response_time, 3),
'吞吐量(请求/秒)': round(requests_per_second, 2),
'测试时间': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}
pressure_test_results.append(test_result)
# 输出测试摘要
print(f"\n📊 压力测试结果摘要 - {concurrent_users}并发用户:")
print(f" ✅ 成功请求: {successful_requests}/{total_questions} ({success_rate:.1f}%)")
print(f" ⏱️ 平均响应时间: {avg_response_time:.3f}s")
print(f" 📈 P95响应时间: {p95_response_time:.3f}s")
print(f" 🚀 吞吐量: {requests_per_second:.2f} 请求/秒")
print(f" ⏰ 测试时长: {test_duration:.2f}s")
return test_result
# ===================== 保存详细回答到CSV =====================
def save_detailed_answers_to_csv(filename: str = None):
"""保存详细回答结果到CSV文件"""
if not detailed_answers:
print("❌ 没有详细回答数据可保存")
return None
if not filename:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'detailed_answers_{timestamp}.csv'
# 定义CSV列顺序
fieldnames = [
'concurrent_users', 'user_id', 'question', 'response_time',
'answer_length', 'full_answer', 'answer_preview', 'metadata',
'status_messages', 'success', 'error', 'test_time'
]
try:
with open(filename, 'w', newline='', encoding='utf-8-sig') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for answer in detailed_answers:
writer.writerow(answer)
print(f"💾 详细回答已保存到: {filename}")
print(f"📊 共保存 {len(detailed_answers)} 条回答记录")
# 打印一些成功回答的预览
successful_answers = [a for a in detailed_answers if a.get('success')]
if successful_answers:
print(f"✅ 成功回答数量: {len(successful_answers)}")
for i, answer in enumerate(successful_answers[:3]): # 显示前3个成功回答的预览
print(f" 示例{i + 1}: {answer.get('answer_preview', '')}")
return filename
except Exception as e:
print(f"❌ 保存CSV文件失败: {str(e)}")
return None
# ===================== 保存详细回答到Excel =====================
def save_detailed_answers_to_excel(filename: str = None):
"""保存详细回答结果到Excel文件"""
if not detailed_answers:
print("❌ 没有详细回答数据可保存")
return None
if not filename:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'detailed_answers_{timestamp}.xlsx'
try:
df = pd.DataFrame(detailed_answers)
# 重新排列列的顺序
column_order = [
'concurrent_users', 'user_id', 'question', 'response_time',
'answer_length', 'answer_preview', 'full_answer', 'metadata',
'status_messages', 'success', 'error', 'test_time'
]
# 只保留存在的列
existing_columns = [col for col in column_order if col in df.columns]
df = df[existing_columns]
# 重命名列名为中文
column_mapping = {
'concurrent_users': '并发用户数',
'user_id': '用户ID',
'question': '问题内容',
'response_time': '响应时间(秒)',
'answer_length': '回答长度',
'answer_preview': '回答预览',
'full_answer': '完整回答',
'metadata': '元数据',
'status_messages': '状态消息',
'success': '是否成功',
'error': '错误信息',
'test_time': '测试时间'
}
# 只重命名存在的列
column_mapping = {k: v for k, v in column_mapping.items() if k in df.columns}
df.rename(columns=column_mapping, inplace=True)
df.to_excel(filename, index=False, engine='openpyxl')
print(f"💾 详细回答已保存到Excel: {filename}")
print(f"📊 共保存 {len(detailed_answers)} 条回答记录")
return filename
except Exception as e:
print(f"❌ 保存Excel文件失败: {str(e)}")
return None
# ===================== 保存完整回答到单独文件 =====================
def save_full_answers_to_text(filename: str = None):
"""将完整回答保存到文本文件,便于阅读"""
if not detailed_answers:
print("❌ 没有详细回答数据可保存")
return None
if not filename:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = f'full_answers_{timestamp}.txt'
try:
with open(filename, 'w', encoding='utf-8') as f:
for i, answer in enumerate(detailed_answers):
f.write(f"=== 记录 {i + 1} ===\n")
f.write(f"并发用户数: {answer.get('concurrent_users', 'N/A')}\n")
f.write(f"用户ID: {answer.get('user_id', 'N/A')}\n")
f.write(f"问题: {answer.get('question', 'N/A')}\n")
f.write(f"响应时间: {answer.get('response_time', 'N/A')}秒\n")
f.write(f"是否成功: {'是' if answer.get('success') else '否'}\n")
if answer.get('success'):
f.write(f"回答长度: {answer.get('answer_length', 0)} 字符\n")
f.write(f"完整回答:\n{answer.get('full_answer', '')}\n")
else:
f.write(f"错误信息: {answer.get('error', '未知错误')}\n")
f.write(f"测试时间: {answer.get('test_time', 'N/A')}\n")
f.write("=" * 50 + "\n\n")
print(f"💾 完整回答已保存到文本文件: {filename}")
return filename
except Exception as e:
print(f"❌ 保存文本文件失败: {str(e)}")
return None
# ===================== 生成压力测试报告 =====================
def generate_pressure_test_report():
"""生成压力测试报告"""
if not pressure_test_results:
print("❌ 没有测试数据可生成报告")
return
# 保存汇总报告
df_summary = pd.DataFrame(pressure_test_results)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
excel_filename = f'pressure_test_summary_{timestamp}.xlsx'
df_summary.to_excel(excel_filename, index=False, engine='openpyxl')
# 保存详细回答
csv_filename = save_detailed_answers_to_csv()
excel_detailed_filename = save_detailed_answers_to_excel()
text_filename = save_full_answers_to_text()
# 生成摘要报告
print(f"\n{'📈' * 20}")
print(f"🎯 压力测试最终报告")
print(f"{'📈' * 20}")
for result in pressure_test_results:
print(f"\n👥 并发用户数: {result['并发用户数']}")
print(f" ✅ 成功率: {result['成功率(%)']}%")
print(f" ⏱️ 平均响应: {result['平均响应时间(秒)']}s")
print(f" 📊 P95响应: {result['P95响应时间(秒)']}s")
print(f" 🚀 吞吐量: {result['吞吐量(请求/秒)']} req/s")
print(f" ⏰ 测试时长: {result['测试总时长(秒)']}s")
# 找出性能拐点
if len(pressure_test_results) > 1:
print(f"\n🔍 性能分析:")
# 计算性能下降点
best_throughput = max(r['吞吐量(请求/秒)'] for r in pressure_test_results)
best_concurrency = next(
r['并发用户数'] for r in pressure_test_results if r['吞吐量(请求/秒)'] == best_throughput)
print(f" 🏆 最佳吞吐量: {best_throughput:.2f} 请求/秒 (在{best_concurrency}并发时)")
# 找出响应时间开始显著上升的点
base_response_time = pressure_test_results[0]['平均响应时间(秒)']
for result in pressure_test_results:
if result['平均响应时间(秒)'] > base_response_time * 3:
print(f" ⚠️ 响应时间显著上升点: {result['并发用户数']} 并发用户")
break
print(f"\n💾 报告文件:")
print(f" 📊 汇总报告: {excel_filename}")
if csv_filename:
print(f" 📝 详细回答(CSV): {csv_filename}")
if excel_detailed_filename:
print(f" 📝 详细回答(Excel): {excel_detailed_filename}")
if text_filename:
print(f" 📄 完整回答(文本): {text_filename}")
return {
'summary': excel_filename,
'detailed_csv': csv_filename,
'detailed_excel': excel_detailed_filename,
'full_text': text_filename
}
# ===================== 健康检查 =====================
async def health_check() -> bool:
"""检查服务是否健康"""
try:
async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10)) as session:
async with session.get(f"{API_BASE_URL}/health") as response:
if response.status == 200:
data = await response.json()
print(f"✅ 服务健康检查通过: {data.get('status', 'unknown')}")
return True
else:
print(f"❌ 服务健康检查失败: HTTP {response.status}")
return False
except Exception as e:
print(f"❌ 服务健康检查异常: {str(e)}")
return False
# ===================== 主函数 =====================
async def main():
"""主压力测试函数"""
print("🎯 RAG系统压力测试开始")
# 健康检查
if not await health_check():
print("❌ 服务不可用,终止测试")
return
# 加载测试问题
questions = load_test_questions("questions.txt", 50) # 加载50个问题用于测试
print(f"📚 加载了 {len(questions)} 个测试问题")
# 执行渐进式压力测试
await progressive_pressure_test(questions)
# 生成报告
report_files = generate_pressure_test_report()
print(f"\n🎉 压力测试完成!")
if report_files.get('full_text'):
print(f"📄 查看完整回答: {report_files['full_text']}")
# ===================== 渐进式压力测试 =====================
async def progressive_pressure_test(questions: List[str]):
"""渐进式压力测试 - 从低并发到高并发"""
print("🎯 开始渐进式压力测试")
print(f"📝 测试问题库: {len(questions)} 个问题")
print(f"🌐 API地址: {API_BASE_URL}")
# 定义并发用户数梯度
concurrency_levels = [1, 5, 10, 20, 30, 50, 80, 100]
# 根据服务器性能调整最大并发请求限制
max_concurrent_requests = 100 # 同时最多100个请求在处理中
for concurrent_users in concurrency_levels:
try:
# 在进入下一级别前等待一下,让服务器恢复
if concurrent_users > 10:
print(f"⏳ 等待10秒让服务器恢复...")
await asyncio.sleep(10)
result = await run_pressure_test(concurrent_users, questions, max_concurrent_requests)
# 如果成功率低于80%或平均响应时间超过30秒,停止测试
if result['成功率(%)'] < 80 or result['平均响应时间(秒)'] > 30:
print(f"🚨 系统性能达到极限,停止测试")
break
except Exception as e:
print(f"❌ 并发级别 {concurrent_users} 测试失败: {str(e)}")
break
# ===================== 快速测试函数 =====================
async def quick_test():
"""快速测试 - 用于验证环境"""
print("🔧 执行快速环境验证测试...")
if not await health_check():
return
questions = [
"电视电话会议和国际会议分别按什么管理?",
"正高级技术职称专业人员和局级领导干部的师资费如何计算?"
]
# 只测试5个并发用户
await run_pressure_test(5, questions, 10)
# 保存结果
report_files = generate_pressure_test_report()
print(f"🔧 快速测试完成! 查看回答: {report_files.get('full_text', 'N/A')}")
if __name__ == '__main__':
# 使用完整测试
# asyncio.run(main())
# 或者使用快速测试
asyncio.run(quick_test()) 帮我看一下这段代码为什么会报错👤 用户005 开始提问,共2个问题
❌ 用户003 问题1 失败 - 错误: None
❌ 用户005 问题1 失败 - 错误: None
❌ 用户002 问题1 失败 - 错误: None
❌ 用户005 问题2 失败 - 错误: None
❌ 用户003 问题2 失败 - 错误: None
❌ 用户任务异常: Sample larger than population or is negative
❌ 用户任务异常: Sample larger than population or is negative