Gemini与Cloud Pub/Sub集成:generative-ai项目消息队列应用指南
引言:为什么需要消息队列集成?
在构建生成式AI应用时,您是否遇到过以下挑战:
- 长时间运行的文本生成任务阻塞用户界面
- 并发请求导致API调用频率超限
- 分布式系统中各组件间的异步通信需求
- 任务执行状态的可靠追踪与重试机制
Cloud Pub/Sub(发布/订阅系统)作为Google Cloud的全托管消息队列服务,可以完美解决这些问题。本文将详细介绍如何在generative-ai项目中实现Gemini模型与Cloud Pub/Sub的无缝集成,构建高可用、可扩展的AI应用架构。
核心概念与架构设计
关键组件解析
| 组件 | 作用 | 技术特性 |
|---|---|---|
| Gemini模型 | 生成式AI能力提供者 | 多模态支持、函数调用、上下文理解 |
| Cloud Pub/Sub | 异步消息传递中间件 | 持久化存储、 Exactly-Once 交付、水平扩展 |
| 发布者(Publisher) | 发送任务请求到队列 | 支持批量发送、异步发送 |
| 订阅者(Subscriber) | 处理队列中的任务 | 支持拉取/推送模式、死信队列 |
| 回调服务(Callback) | 处理任务结果 | 结果存储、通知、后续处理 |
系统架构流程图
环境准备与依赖安装
前提条件
- Google Cloud账号及项目
- 已启用Vertex AI API和Cloud Pub/Sub API
- Python 3.9+环境
- 适当的IAM权限(Pub/Sub管理员、Vertex AI用户)
安装必要依赖
# 克隆项目仓库
git clone https://gitcode.com/GitHub_Trending/ge/generative-ai
cd generative-ai
# 安装依赖包
pip install --upgrade google-cloud-pubsub google-cloud-aiplatform python-dotenv
配置环境变量
创建.env文件并添加以下内容:
# Google Cloud配置
PROJECT_ID=your-project-id
LOCATION=us-central1
TOPIC_ID=gemini-task-queue
SUBSCRIPTION_ID=gemini-task-subscription
# Gemini模型配置
MODEL_ID=gemini-2.0-flash
实现步骤:从消息发布到结果处理
步骤1:创建Pub/Sub主题和订阅
from google.cloud import pubsub_v1
import os
from dotenv import load_dotenv
load_dotenv()
project_id = os.getenv("PROJECT_ID")
topic_id = os.getenv("TOPIC_ID")
subscription_id = os.getenv("SUBSCRIPTION_ID")
# 创建主题
publisher = pubsub_v1.PublisherClient()
topic_path = publisher.topic_path(project_id, topic_id)
topic = publisher.create_topic(request={"name": topic_path})
print(f"创建主题: {topic.name}")
# 创建订阅
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_id)
subscription = subscriber.create_subscription(
request={
"name": subscription_path,
"topic": topic_path,
"ack_deadline_seconds": 300, # 5分钟处理超时
}
)
print(f"创建订阅: {subscription.name}")
步骤2:实现任务发布者(生产者)
import json
import time
from google.cloud import pubsub_v1
import os
from dotenv import load_dotenv
load_dotenv()
project_id = os.getenv("PROJECT_ID")
topic_id = os.getenv("TOPIC_ID")
publisher = pubsub_v1.PublisherClient()
topic_path = publisher.topic_path(project_id, topic_id)
def publish_gemini_task(prompt, task_id=None, priority="normal", metadata=None):
"""
发布Gemini生成任务到Pub/Sub队列
参数:
prompt: Gemini模型的输入提示
task_id: 可选,任务唯一标识符
priority: 任务优先级 (high/normal/low)
metadata: 可选,任务元数据
"""
# 生成唯一任务ID(如果未提供)
task_id = task_id or f"gemini-task-{int(time.time())}-{os.urandom(4).hex()}"
# 构建任务消息
message = {
"task_id": task_id,
"prompt": prompt,
"priority": priority,
"timestamp": time.time(),
"metadata": metadata or {}
}
# 序列化为JSON并编码为字节
data = json.dumps(message).encode("utf-8")
# 发布消息
future = publisher.publish(topic_path, data=data)
# 等待发布完成并返回结果
try:
message_id = future.result()
print(f"任务 {task_id} 已发布,消息ID: {message_id}")
return {
"success": True,
"task_id": task_id,
"message_id": message_id
}
except Exception as e:
print(f"发布任务失败: {str(e)}")
return {
"success": False,
"error": str(e)
}
# 示例使用
if __name__ == "__main__":
# 发布示例任务
result = publish_gemini_task(
prompt="写一篇关于人工智能在医疗领域应用的500字文章",
metadata={"category": "article", "word_count": 500}
)
if result["success"]:
print(f"任务已成功提交,任务ID: {result['task_id']}")
else:
print(f"任务提交失败: {result['error']}")
步骤3:实现任务订阅者(消费者)
import json
import os
from google.cloud import pubsub_v1
from google.cloud import aiplatform
from google.genai import GenerativeModel
from dotenv import load_dotenv
load_dotenv()
project_id = os.getenv("PROJECT_ID")
location = os.getenv("LOCATION")
subscription_id = os.getenv("SUBSCRIPTION_ID")
model_id = os.getenv("MODEL_ID")
# 初始化Vertex AI
aiplatform.init(project=project_id, location=location)
# 初始化Gemini模型
model = GenerativeModel(model_id)
def process_gemini_task(message):
"""处理单个Gemini生成任务"""
try:
# 解析消息数据
task_data = json.loads(message.data.decode("utf-8"))
task_id = task_data["task_id"]
prompt = task_data["prompt"]
metadata = task_data.get("metadata", {})
print(f"开始处理任务 {task_id}")
# 调用Gemini模型生成内容
response = model.generate_content(prompt)
# 处理生成结果(这里可以保存到数据库、发送通知等)
result = {
"task_id": task_id,
"success": True,
"content": response.text,
"prompt": prompt,
"metadata": metadata,
"timestamp": {
"processed_at": time.time()
}
}
# 保存结果(示例:这里只是打印,实际应用中应保存到数据库)
print(f"任务 {task_id} 处理完成,结果长度: {len(result['content'])}")
# 手动确认消息已处理
message.ack()
return result
except Exception as e:
print(f"处理任务时出错: {str(e)}")
# 可以根据错误类型决定是否立即重试或让消息重新入队
# message.nack() # 消息将在ack_deadline_seconds后重新可见
raise e
def start_subscriber():
"""启动订阅者服务,持续监听并处理任务"""
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_id)
print(f"开始监听订阅: {subscription_path}")
# 定义消息回调函数
def callback(message):
try:
process_gemini_task(message)
except Exception as e:
print(f"处理消息时发生错误: {str(e)}")
# 标记消息为未处理,以便稍后重试
message.nack()
# 开始监听
streaming_pull_future = subscriber.subscribe(
subscription_path,
callback=callback,
flow_control=pubsub_v1.types.FlowControl(max_messages=10) # 控制并发处理数量
)
print(f"订阅者已启动,按Ctrl+C停止")
# 保持订阅者运行
try:
streaming_pull_future.result()
except KeyboardInterrupt:
streaming_pull_future.cancel()
streaming_pull_future.result()
finally:
subscriber.close()
# 启动订阅者
if __name__ == "__main__":
start_subscriber()
步骤4:实现结果回调与状态跟踪
import json
import time
import os
from google.cloud import firestore
from dotenv import load_dotenv
load_dotenv()
# 初始化Firestore客户端(用于存储任务状态和结果)
db = firestore.Client()
tasks_collection = db.collection("gemini_tasks")
def update_task_status(task_id, status, result=None, error=None):
"""更新任务状态到数据库"""
try:
task_ref = tasks_collection.document(task_id)
# 获取当前任务数据(如果存在)
task_data = {}
if task_ref.get().exists:
task_data = task_ref.get().to_dict()
# 更新状态和时间戳
update_data = {
"status": status,
f"{status}_at": time.time()
}
# 如果有结果,添加结果
if result:
update_data["result"] = result
# 如果有错误,添加错误信息
if error:
update_data["error"] = str(error)
update_data["failed_attempts"] = task_data.get("failed_attempts", 0) + 1
# 合并更新数据
task_data.update(update_data)
# 保存更新
task_ref.set(task_data)
print(f"任务 {task_id} 状态更新为: {status}")
return True
except Exception as e:
print(f"更新任务状态失败: {str(e)}")
return False
def task_completion_callback(task_id, result):
"""任务完成回调函数"""
# 更新任务状态为已完成
update_task_status(task_id, "completed", result=result)
# 这里可以添加额外的后续处理,例如:
# 1. 发送通知(邮件、短信、推送等)
# 2. 触发其他工作流
# 3. 更新相关系统数据
metadata = result.get("metadata", {})
if metadata.get("notify_user"):
user_email = metadata["notify_user"]
# send_notification(user_email, task_id, result)
print(f"通知已发送到 {user_email} 关于任务 {task_id} 的完成情况")
# 在任务处理函数中集成状态更新和回调
def enhanced_process_gemini_task(message):
"""增强版任务处理函数,包含状态跟踪和结果回调"""
try:
# 解析消息数据
task_data = json.loads(message.data.decode("utf-8"))
task_id = task_data["task_id"]
prompt = task_data["prompt"]
metadata = task_data.get("metadata", {})
# 初始化任务记录
update_task_status(
task_id,
"received",
result={"prompt": prompt, "metadata": metadata}
)
print(f"开始处理任务 {task_id}")
update_task_status(task_id, "processing")
# 调用Gemini模型生成内容
response = model.generate_content(prompt)
# 准备结果数据
result = {
"task_id": task_id,
"content": response.text,
"usage": {
"input_tokens": response.usage_metadata.input_token_count,
"output_tokens": response.usage_metadata.output_token_count
}
}
# 更新任务状态为已完成
task_completion_callback(task_id, result)
# 确认消息已处理
message.ack()
return result
except Exception as e:
error_msg = str(e)
print(f"处理任务时出错: {error_msg}")
# 更新任务状态为失败
update_task_status(task_id, "failed", error=error_msg)
# 标记消息为未处理,以便重试
message.nack()
raise e
高级特性与最佳实践
批量处理与流量控制
def start_batched_subscriber():
"""启动支持批量处理的订阅者"""
subscriber = pubsub_v1.SubscriberClient()
subscription_path = subscriber.subscription_path(project_id, subscription_id)
# 配置批量处理参数
flow_control = pubsub_v1.types.FlowControl(
max_messages=100, # 最大并发处理消息数
max_bytes=10 * 1024 * 1024, # 最大接收字节数
max_lease_duration=300, # 消息最长租赁时间(秒)
)
# 配置批量接收参数
scheduler = pubsub_v1.types.BatchScheduler(
max_delay=0.5, # 最大延迟(秒)
max_messages=50, # 每批最大消息数
max_bytes=5 * 1024 * 1024, # 每批最大字节数
)
def callback(message):
try:
process_gemini_task(message)
except Exception as e:
print(f"处理消息时发生错误: {str(e)}")
message.nack()
print(f"启动批量处理订阅者,监听 {subscription_path}")
streaming_pull_future = subscriber.subscribe(
subscription_path,
callback=callback,
flow_control=flow_control,
scheduler=scheduler
)
try:
streaming_pull_future.result()
except KeyboardInterrupt:
streaming_pull_future.cancel()
streaming_pull_future.result()
finally:
subscriber.close()
优先级队列实现
def create_priority_topics_and_subscriptions():
"""创建不同优先级的主题和订阅"""
publisher = pubsub_v1.PublisherClient()
# 优先级列表
priorities = ["high", "normal", "low"]
for priority in priorities:
# 创建主题
topic_id = f"gemini-task-queue-{priority}"
topic_path = publisher.topic_path(project_id, topic_id)
try:
topic = publisher.create_topic(request={"name": topic_path})
print(f"创建优先级主题: {topic.name}")
except Exception as e:
if "AlreadyExists" in str(e):
print(f"优先级主题已存在: {topic_path}")
else:
raise e
# 创建订阅
subscriber = pubsub_v1.SubscriberClient()
subscription_id = f"gemini-task-subscription-{priority}"
subscription_path = subscriber.subscription_path(project_id, subscription_id)
# 高优先级订阅可以设置更短的重试延迟
retry_policy = pubsub_v1.types.RetryPolicy(
minimum_backoff=30 if priority == "high" else 60, # 高优先级30秒,其他60秒
maximum_backoff=300 if priority == "high" else 600 # 高优先级5分钟,其他10分钟
)
try:
subscription = subscriber.create_subscription(
request={
"name": subscription_path,
"topic": topic_path,
"ack_deadline_seconds": 300,
"retry_policy": retry_policy,
}
)
print(f"创建优先级订阅: {subscription.name}")
except Exception as e:
if "AlreadyExists" in str(e):
print(f"优先级订阅已存在: {subscription_path}")
else:
raise e
def publish_priority_task(prompt, priority="normal", **kwargs):
"""发布到对应优先级的主题"""
# 验证优先级
if priority not in ["high", "normal", "low"]:
raise ValueError("优先级必须是 'high', 'normal' 或 'low'")
# 获取对应优先级的主题
topic_id = f"gemini-task-queue-{priority}"
publisher = pubsub_v1.PublisherClient()
topic_path = publisher.topic_path(project_id, topic_id)
# 构建任务消息
task_id = kwargs.get("task_id") or f"gemini-task-{int(time.time())}-{os.urandom(4).hex()}"
message = {
"task_id": task_id,
"prompt": prompt,
"priority": priority,
"timestamp": time.time(),
"metadata": kwargs.get("metadata", {})
}
# 序列化为JSON并编码为字节
data = json.dumps(message).encode("utf-8")
# 发布消息
future = publisher.publish(topic_path, data=data)
try:
message_id = future.result()
print(f"优先级 {priority} 任务 {task_id} 已发布")
return {
"success": True,
"task_id": task_id,
"message_id": message_id
}
except Exception as e:
print(f"发布优先级 {priority} 任务失败: {str(e)}")
return {
"success": False,
"error": str(e)
}
错误处理与重试机制
def process_with_retry(message, max_retries=3):
"""带重试机制的消息处理"""
retries = 0
while retries < max_retries:
try:
return process_gemini_task(message)
except Exception as e:
retries += 1
if retries >= max_retries:
print(f"达到最大重试次数 ({max_retries}),将消息发送到死信队列")
# 这里可以实现将消息发送到死信队列的逻辑
message.ack() # 确认消息,避免无限重试
# send_to_dead_letter_queue(message, str(e))
return None
delay = 2 ** retries # 指数退避重试延迟
print(f"处理失败,重试 {retries}/{max_retries},延迟 {delay} 秒: {str(e)}")
time.sleep(delay)
return None
def create_dead_letter_topic_and_subscription():
"""创建死信队列主题和订阅"""
publisher = pubsub_v1.PublisherClient()
subscriber = pubsub_v1.SubscriberClient()
# 创建死信队列主题
dlq_topic_id = "gemini-task-dlq"
dlq_topic_path = publisher.topic_path(project_id, dlq_topic_id)
try:
dlq_topic = publisher.create_topic(request={"name": dlq_topic_path})
print(f"创建死信队列主题: {dlq_topic.name}")
except Exception as e:
if "AlreadyExists" in str(e):
print(f"死信队列主题已存在: {dlq_topic_path}")
else:
raise e
# 创建死信队列订阅
dlq_subscription_id = "gemini-task-dlq-subscription"
dlq_subscription_path = subscriber.subscription_path(project_id, dlq_subscription_id)
try:
dlq_subscription = subscriber.create_subscription(
request={
"name": dlq_subscription_path,
"topic": dlq_topic_path,
"ack_deadline_seconds": 600, # 死信队列消息保留更长时间
}
)
print(f"创建死信队列订阅: {dlq_subscription.name}")
except Exception as e:
if "AlreadyExists" in str(e):
print(f"死信队列订阅已存在: {dlq_subscription_path}")
else:
raise e
return dlq_topic_path, dlq_subscription_path
def send_to_dead_letter_queue(message, error):
"""将失败的消息发送到死信队列"""
dlq_topic_path, _ = create_dead_letter_topic_and_subscription()
try:
# 解析原始消息
original_data = json.loads(message.data.decode("utf-8"))
# 添加错误信息
failed_data = {
"original_message": original_data,
"error": error,
"failed_at": time.time(),
"delivery_attempt": message.delivery_attempt
}
# 发送到死信队列
publisher = pubsub_v1.PublisherClient()
future = publisher.publish(dlq_topic_path, data=json.dumps(failed_data).encode("utf-8"))
message_id = future.result()
print(f"消息已发送到死信队列,消息ID: {message_id}")
return True
except Exception as e:
print(f"发送到死信队列失败: {str(e)}")
return False
性能优化与监控
性能优化策略
-
批量处理优化
- 实现消息批量处理,减少API调用次数
- 根据消息大小和处理时间动态调整批处理大小
-
并发控制
- 根据系统资源和Gemini API配额调整并发处理数
- 使用优先级队列确保重要任务优先处理
-
缓存策略
- 缓存常见任务的提示模板
- 对重复请求进行去重处理
-
资源管理
- 实现连接池复用,减少连接建立开销
- 根据负载自动扩缩容订阅者数量
监控与日志实现
import logging
from google.cloud import monitoring_v3
from google.protobuf.timestamp_pb2 import Timestamp
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("gemini_worker.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger("gemini-pubsub-worker")
def record_metric(metric_name, value, labels=None):
"""记录自定义指标到Cloud Monitoring"""
try:
client = monitoring_v3.MetricServiceClient()
project_name = f"projects/{project_id}"
# 定义指标标签
labels = labels or {}
labels["worker"] = "gemini-task-processor"
# 创建时间戳
now = Timestamp()
now.GetCurrentTime()
# 创建指标系列
series = monitoring_v3.TimeSeries()
series.metric.type = f"custom.googleapis.com/gemini/{metric_name}"
# 添加标签
for key, value in labels.items():
series.metric.labels[key] = value
series.resource.type = "global"
# 添加数据点
point = series.points.add()
point.value.double_value = value
point.interval.end_time.CopyFrom(now)
# 发送指标
client.create_time_series(name=project_name, time_series=[series])
return True
except Exception as e:
logger.error(f"记录指标失败: {str(e)}")
return False
# 在任务处理函数中添加监控
def monitored_process_gemini_task(message):
"""添加了监控的任务处理函数"""
start_time = time.time()
task_id = "unknown"
try:
# 解析消息数据获取task_id
task_data = json.loads(message.data.decode("utf-8"))
task_id = task_data.get("task_id", "unknown")
priority = task_data.get("priority", "normal")
# 记录任务开始指标
record_metric("tasks_started", 1, {"priority": priority})
# 处理任务
result = process_gemini_task(message)
# 记录处理时间
processing_time = time.time() - start_time
record_metric(
"processing_time_seconds",
processing_time,
{"priority": priority, "status": "success"}
)
# 记录成功指标
record_metric("tasks_completed", 1, {"priority": priority})
logger.info(f"任务 {task_id} 处理完成,耗时: {processing_time:.2f}秒")
return result
except Exception as e:
# 记录失败指标
processing_time = time.time() - start_time
record_metric(
"processing_time_seconds",
processing_time,
{"priority": "unknown", "status": "error"}
)
record_metric("tasks_failed", 1, {"error_type": str(type(e))})
logger.error(f"任务 {task_id} 处理失败: {str(e)}", exc_info=True)
raise e
完整示例:文章生成服务
以下是一个完整的基于Gemini和Cloud Pub/Sub的文章生成服务实现,整合了上述所有功能:
# gemini_article_generator_service.py
import json
import time
import os
import logging
from google.cloud import pubsub_v1
from google.genai import GenerativeModel
from google.cloud import firestore
from dotenv import load_dotenv
# 配置日志
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("gemini-article-generator")
# 加载环境变量
load_dotenv()
# 配置参数
PROJECT_ID = os.getenv("PROJECT_ID")
LOCATION = os.getenv("LOCATION", "us-central1")
MODEL_ID = os.getenv("MODEL_ID", "gemini-2.0-flash")
# 初始化客户端
db = firestore.Client()
tasks_collection = db.collection("article_generation_tasks")
class GeminiArticleGenerator:
def __init__(self):
"""初始化文章生成器"""
self.model = GenerativeModel(MODEL_ID)
self.publisher = pubsub_v1.PublisherClient()
self.subscriber = pubsub_v1.SubscriberClient()
# 创建必要的主题和订阅
self._create_topics_and_subscriptions()
def _create_topics_and_subscriptions(self):
"""创建必要的Pub/Sub主题和订阅"""
# 创建优先级主题
for priority in ["high", "normal", "low"]:
topic_id = f"article-generation-queue-{priority}"
topic_path = self.publisher.topic_path(PROJECT_ID, topic_id)
try:
self.publisher.create_topic(request={"name": topic_path})
logger.info(f"创建优先级主题: {topic_path}")
except Exception as e:
if "AlreadyExists" not in str(e):
logger.warning(f"创建主题失败: {str(e)}")
# 创建死信队列
dlq_topic_id = "article-generation-dlq"
self.dlq_topic_path = self.publisher.topic_path(PROJECT_ID, dlq_topic_id)
try:
self.publisher.create_topic(request={"name": self.dlq_topic_path})
logger.info(f"创建死信队列主题: {self.dlq_topic_path}")
except Exception as e:
if "AlreadyExists" not in str(e):
logger.warning(f"创建死信队列失败: {str(e)}")
def submit_article_request(self, topic, word_count=500, tone="neutral", priority="normal", user_id=None):
"""
提交文章生成请求
参数:
topic: 文章主题
word_count: 目标字数
tone: 文章语气 (neutral/professional/casual/technical)
priority: 优先级 (high/normal/low)
user_id: 请求用户ID
返回:
包含任务ID的字典
"""
# 验证优先级
if priority not in ["high", "normal", "low"]:
raise ValueError("优先级必须是 'high', 'normal' 或 'low'")
# 生成任务ID
task_id = f"article-task-{int(time.time())}-{os.urandom(4).hex()}"
# 创建提示模板
prompt = f"""写一篇关于"{topic}"的文章,要求:
- 大约{word_count}字
- 语气{tone}
- 结构清晰,包含引言、正文和结论
- 内容准确,信息丰富
- 适合目标读者理解
请确保内容原创,逻辑连贯,并提供有价值的见解。"""
# 创建任务数据
task_data = {
"task_id": task_id,
"topic": topic,
"word_count": word_count,
"tone": tone,
"priority": priority,
"user_id": user_id,
"status": "pending",
"created_at": time.time()
}
# 保存任务记录
tasks_collection.document(task_id).set(task_data)
# 发布到Pub/Sub
topic_id = f"article-generation-queue-{priority}"
topic_path = self.publisher.topic_path(PROJECT_ID, topic_id)
message_data = {
"task_id": task_id,
"prompt": prompt,
"metadata": {
"topic": topic,
"word_count": word_count,
"tone": tone,
"user_id": user_id
},
"priority": priority,
"timestamp": time.time()
}
try:
# 发布消息
future = self.publisher.publish(
topic_path,
data=json.dumps(message_data).encode("utf-8")
)
message_id = future.result()
logger.info(f"文章生成任务 {task_id} 已提交,消息ID: {message_id}")
# 更新任务状态
tasks_collection.document(task_id).update({
"status": "submitted",
"message_id": message_id
})
return {
"success": True,
"task_id": task_id,
"message": "文章生成任务已提交"
}
except Exception as e:
error_msg = str(e)
logger.error(f"提交任务失败: {error_msg}")
# 更新任务状态为失败
tasks_collection.document(task_id).update({
"status": "failed",
"error": error_msg
})
return {
"success": False,
"error": error_msg,
"task_id": task_id
}
def start_processing_articles(self, priority="normal", max_concurrent=5):
"""
开始处理文章生成任务
参数:
priority: 要处理的优先级队列
max_concurrent: 最大并发处理数
"""
if priority not in ["high", "normal", "low"]:
raise ValueError("优先级必须是 'high', 'normal' 或 'low'")
subscription_id = f"article-worker-{priority}-subscription"
topic_id = f"article-generation-queue-{priority}"
topic_path = self.publisher.topic_path(PROJECT_ID, topic_id)
subscription_path = self.subscriber.subscription_path(PROJECT_ID, subscription_id)
# 创建或获取订阅
try:
self.subscriber.create_subscription(
request={
"name": subscription_path,
"topic": topic_path,
"ack_deadline_seconds": 300,
"flow_control": pubsub_v1.types.FlowControl(
max_messages=max_concurrent
)
}
)
logger.info(f"创建订阅: {subscription_path}")
except Exception as e:
if "AlreadyExists" in str(e):
logger.info(f"使用现有订阅: {subscription_path}")
else:
logger.error(f"创建订阅失败: {str(e)}")
return
# 定义消息处理回调
def process_message(message):
try:
start_time = time.time()
message_data = json.loads(message.data.decode("utf-8"))
task_id = message_data["task_id"]
prompt = message_data["prompt"]
metadata = message_data.get("metadata", {})
logger.info(f"处理文章任务: {task_id}")
# 更新任务状态
tasks_collection.document(task_id).update({
"status": "processing",
"started_at": time.time()
})
# 调用Gemini生成文章
response = self.model.generate_content(prompt)
article_content = response.text
# 估算字数
actual_word_count = len(article_content.split())
# 更新任务结果
tasks_collection.document(task_id).update({
"status": "completed",
"completed_at": time.time(),
"article_content": article_content,
"actual_word_count": actual_word_count,
"token_usage": {
"input_tokens": response.usage_metadata.input_token_count,
"output_tokens": response.usage_metadata.output_token_count
}
})
# 记录处理时间
processing_time = time.time() - start_time
logger.info(f"任务 {task_id} 处理完成,耗时 {processing_time:.2f}秒")
# 确认消息
message.ack()
except Exception as e:
error_msg = str(e)
logger.error(f"处理任务失败: {error_msg}", exc_info=True)
# 更新任务状态
try:
tasks_collection.document(task_id).update({
"status": "failed",
"error": error_msg,
"failed_at": time.time()
})
except:
pass
# 将消息发送到死信队列
try:
dlq_message = {
"original_message": message_data,
"error": error_msg,
"failed_at": time.time(),
"delivery_attempt": message.delivery_attempt
}
self.publisher.publish(
self.dlq_topic_path,
data=json.dumps(dlq_message).encode("utf-8")
)
logger.info(f"任务 {task_id} 已发送到死信队列")
except Exception as dlq_e:
logger.error(f"发送到死信队列失败: {str(dlq_e)}")
# 确认消息,避免无限重试
message.ack()
# 开始监听消息
logger.info(f"开始处理 {priority} 优先级文章任务...")
streaming_pull_future = self.subscriber.subscribe(
subscription_path,
callback=process_message
)
# 保持运行
try:
streaming_pull_future.result()
except KeyboardInterrupt:
streaming_pull_future.cancel()
streaming_pull_future.result()
finally:
self.subscriber.close()
# 服务使用示例
if __name__ == "__main__":
# 创建生成器实例
article_generator = GeminiArticleGenerator()
# 示例:提交文章请求
if os.environ.get("SUBMIT_EXAMPLE", "false").lower() == "true":
result = article_generator.submit_article_request(
topic="人工智能在环境保护中的应用",
word_count=800,
tone="technical",
priority="high",
user_id="example-user-123"
)
if result["success"]:
print(f"文章请求已提交,任务ID: {result['task_id']}")
else:
print(f"提交失败: {result['error']}")
# 启动处理(默认处理normal优先级)
else:
priority = os.environ.get("PROCESS_PRIORITY", "normal")
article_generator.start_processing_articles(priority=priority)
部署与扩展
使用Cloud Run部署订阅者服务
# cloudrun-subscriber.yaml
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: gemini-pubsub-worker
namespace: default
spec:
template:
spec:
containers:
- image: gcr.io/[PROJECT_ID]/gemini-pubsub-worker:latest
ports:
- containerPort: 8080
env:
- name: PROJECT_ID
value: "[PROJECT_ID]"
- name: LOCATION
value: "us-central1"
- name: MODEL_ID
value: "gemini-2.0-flash"
- name: PROCESS_PRIORITY
value: "normal"
resources:
limits:
cpu: "2"
memory: "4Gi"
requests:
cpu: "1"
memory: "2Gi"
serviceAccountName: gemini-worker@[PROJECT_ID].iam.gserviceaccount.com
traffic:
- percent: 100
latestRevision: true
使用Terraform配置基础设施
# main.tf - Pub/Sub和Gemini基础设施配置
provider "google" {
project = var.project_id
region = var.region
}
# 启用必要API
resource "google_project_service" "pubsub" {
service = "pubsub.googleapis.com"
}
resource "google_project_service" "aiplatform" {
service = "aiplatform.googleapis.com"
}
# 创建Pub/Sub主题
resource "google_pubsub_topic" "high_priority" {
name = "gemini-task-queue-high"
}
resource "google_pubsub_topic" "normal_priority" {
name = "gemini-task-queue-normal"
}
resource "google_pubsub_topic" "low_priority" {
name = "gemini-task-queue-low"
}
# 创建死信队列
resource "google_pubsub_topic" "dlq" {
name = "gemini-task-dlq"
}
# 创建订阅
resource "google_pubsub_subscription" "high_sub" {
name = "gemini-task-sub-high"
topic = google_pubsub_topic.high_priority.name
ack_deadline_seconds = 300
retry_policy {
minimum_backoff = "30s"
maximum_backoff = "300s"
}
dead_letter_policy {
dead_letter_topic = google_pubsub_topic.dlq.id
max_delivery_attempts = 5
}
}
resource "google_pubsub_subscription" "normal_sub" {
name = "gemini-task-sub-normal"
topic = google_pubsub_topic.normal_priority.name
ack_deadline_seconds = 300
retry_policy {
minimum_backoff = "60s"
maximum_backoff = "600s"
}
dead_letter_policy {
dead_letter_topic = google_pubsub_topic.dlq.id
max_delivery_attempts = 5
}
}
resource "google_pubsub_subscription" "low_sub" {
name = "gemini-task-sub-low"
topic = google_pubsub_topic.low_priority.name
ack_deadline_seconds = 300
retry_policy {
minimum_backoff = "120s"
maximum_backoff = "1200s"
}
dead_letter_policy {
dead_letter_topic = google_pubsub_topic.dlq.id
max_delivery_attempts = 5
}
}
# IAM权限配置
resource "google_pubsub_topic_iam_binding" "publisher" {
topic = google_pubsub_topic.normal_priority.name
role = "roles/pubsub.publisher"
members = [
"serviceAccount:gemini-publisher@${var.project_id}.iam.gserviceaccount.com",
]
}
resource "google_pubsub_subscription_iam_binding" "subscriber" {
subscription = google_pubsub_subscription.normal_sub.name
role = "roles/pubsub.subscriber"
members = [
"serviceAccount:gemini-worker@${var.project_id}.iam.gserviceaccount.com",
]
}
总结与最佳实践
关键要点回顾
-
架构优势
- 异步处理提高系统响应性和容错能力
- 解耦生成任务提交与处理,提高系统弹性
- 优先级队列确保重要任务优先处理
-
可靠性保障
- 消息持久化防止任务丢失
- 重试机制处理临时故障
- 死信队列隔离无法处理的消息
- 状态跟踪提供任务全生命周期可见性
-
性能优化
- 批量处理减少API调用开销
- 并发控制平衡资源利用与API配额
- 优先级调度确保资源合理分配
生产环境检查清单
- 已配置适当的IAM权限和安全控制
- 实现了消息验证和输入净化
- 配置了监控、日志和告警
- 设置了合理的重试策略和死信队列
- 实现了任务状态跟踪和结果存储
- 进行了负载测试和性能优化
- 准备了扩展策略应对流量增长
- 制定了灾难恢复计划
后续学习与扩展方向
-
高级功能
- 实现消息压缩减少网络传输
- 添加消息加密增强安全性
- 实现基于内容的去重机制
-
系统扩展
- 多区域部署提高可用性
- 实现自动扩缩容应对负载变化
- 集成更复杂的工作流管理
-
功能增强
- 添加任务取消和优先级调整功能
- 实现更精细的任务进度跟踪
- 集成用户反馈和内容评级系统
通过Gemini与Cloud Pub/Sub的集成,您可以构建一个高效、可靠、可扩展的生成式AI应用系统,为用户提供流畅的体验同时确保系统稳定性和资源效率。无论是处理批量文档生成、实时内容创建还是复杂的多步骤AI工作流,这种架构都能为您的应用提供坚实的基础。
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



