在bilibili-api项目中获取用户@消息来源的技术解析
引言:为什么需要精准获取@消息来源?
在B站(哔哩哔哩)的社交生态中,@(At)消息是用户互动的重要形式。无论是视频评论区的@提醒,还是动态中的@通知,这些消息都承载着丰富的社交信息。对于开发者而言,准确获取@消息的来源信息,不仅能够实现智能消息提醒,还能构建个性化的用户互动分析系统。
本文将深入解析bilibili-api项目中获取用户@消息来源的技术实现,从API接口设计到实际应用场景,为您提供完整的技术解决方案。
@消息获取的核心API接口
1. 获取@消息的主要接口
在bilibili-api项目中,获取@消息的核心功能位于session模块的get_at方法:
async def get_at(
credential: Credential,
last_uid: int = None,
at_time: int = None
) -> dict:
"""
获取收到的AT消息
Args:
credential (Credential): 凭据类,包含用户认证信息
last_uid (int, optional): 最后一个AT消息的用户UID,用于分页
at_time (int, optional): 最后一个AT消息的时间戳,用于分页
Returns:
dict: 包含AT消息列表的响应数据
"""
api = API["session"]["at"]
params = {"id": last_uid, "at_time": at_time}
return await Api(**api, credential=credential).update_params(**params).result
2. API接口配置详情
在session.json配置文件中,@消息接口的定义如下:
{
"at": {
"url": "https://api.bilibili.com/x/msgfeed/at",
"method": "GET",
"verify": true,
"comment": "获取未读AT消息"
}
}
@消息数据结构解析
典型的@消息响应格式
{
"code": 0,
"message": "success",
"ttl": 1,
"data": {
"items": [
{
"id": 123456789, // 消息ID
"at_time": 1640995200, // @消息时间戳
"user": {
"uid": 10086, // @你的用户UID
"uname": "示例用户", // @你的用户名
"face": "https://example.com/face.jpg" // 用户头像
},
"item": {
"subject_id": 12345, // 源内容ID(视频/动态ID)
"source_type": 1, // 来源类型:1-视频,2-动态
"source_content": "这是一个示例内容" // 源内容摘要
},
"business_id": "video:12345", // 业务标识符
"uri": "https://www.bilibili.com/video/BV1xxx" // 源内容链接
}
],
"has_more": true, // 是否有更多数据
"next_offset": { // 下一页参数
"id": 123456788,
"at_time": 1640995199
}
}
}
消息来源类型映射表
| 来源类型代码 | 来源类型 | 业务标识符前缀 | 说明 |
|---|---|---|---|
| 1 | 视频评论 | video: | 用户在视频评论区@你 |
| 2 | 动态 | dynamic: | 用户在动态中@你 |
| 3 | 专栏文章 | article: | 用户在专栏文章@你 |
| 4 | 音频 | audio: | 用户在音频评论区@你 |
| 5 | 直播 | live: | 用户在直播中@你 |
实战:构建@消息监控系统
1. 基础@消息获取示例
import asyncio
from bilibili_api import session, Credential
async def monitor_at_messages():
# 初始化凭据(需要已登录的SESSDATA)
credential = Credential(sessdata="你的SESSDATA")
try:
# 获取最新的@消息
at_messages = await session.get_at(credential)
if at_messages["code"] == 0:
for item in at_messages["data"]["items"]:
print(f"📧 收到来自 {item['user']['uname']} 的@")
print(f"⏰ 时间: {item['at_time']}")
print(f"📝 来源: {item['item']['source_content'][:50]}...")
print(f"🔗 链接: {item['uri']}")
print("-" * 50)
except Exception as e:
print(f"获取@消息失败: {e}")
# 运行监控
asyncio.run(monitor_at_messages())
2. 分页获取完整@消息历史
async def get_all_at_messages(credential, max_pages=10):
"""获取所有@消息(分页处理)"""
all_messages = []
last_uid = None
last_time = None
for page in range(max_pages):
messages = await session.get_at(
credential,
last_uid=last_uid,
at_time=last_time
)
if messages["code"] != 0 or not messages["data"]["items"]:
break
all_messages.extend(messages["data"]["items"])
# 更新分页参数
if messages["data"]["has_more"]:
last_item = messages["data"]["items"][-1]
last_uid = last_item["user"]["uid"]
last_time = last_item["at_time"]
else:
break
return all_messages
3. @消息来源分析统计
from collections import defaultdict
from datetime import datetime
def analyze_at_messages(messages):
"""分析@消息来源分布"""
analysis = {
'by_source_type': defaultdict(int),
'by_user': defaultdict(int),
'by_hour': defaultdict(int),
'total_count': len(messages)
}
for msg in messages:
# 按来源类型统计
source_type = msg['item']['source_type']
analysis['by_source_type'][source_type] += 1
# 按用户统计
uid = msg['user']['uid']
analysis['by_user'][uid] += 1
# 按时间段统计
hour = datetime.fromtimestamp(msg['at_time']).hour
analysis['by_hour'][hour] += 1
return analysis
高级应用场景
场景1:智能消息分类与路由
场景2:@消息实时监控告警
class AtMessageMonitor:
def __init__(self, credential, check_interval=60):
self.credential = credential
self.check_interval = check_interval
self.last_check_time = int(time.time())
self.important_users = {12345, 67890} # 重要用户UID列表
async def start_monitoring(self):
while True:
try:
new_messages = await self.get_new_messages()
await self.process_messages(new_messages)
await asyncio.sleep(self.check_interval)
except Exception as e:
print(f"监控异常: {e}")
await asyncio.sleep(300) # 异常时等待5分钟
async def get_new_messages(self):
messages = await session.get_at(self.credential)
new_messages = [
msg for msg in messages.get('data', {}).get('items', [])
if msg['at_time'] > self.last_check_time
]
self.last_check_time = int(time.time())
return new_messages
async def process_messages(self, messages):
for msg in messages:
if msg['user']['uid'] in self.important_users:
await self.send_alert(msg)
await self.auto_reply(msg)
async def send_alert(self, message):
# 发送重要@消息告警(邮件/钉钉/微信等)
pass
async def auto_reply(self, message):
# 智能自动回复逻辑
pass
技术难点与解决方案
难点1:消息去重处理
由于B站API可能返回重复消息,需要实现高效的去重机制:
class MessageDeduplicator:
def __init__(self, max_size=1000):
self.seen_messages = set()
self.max_size = max_size
def is_duplicate(self, message):
# 基于消息ID、用户UID、时间戳生成唯一标识
msg_id = message['id']
uid = message['user']['uid']
timestamp = message['at_time']
unique_key = f"{msg_id}_{uid}_{timestamp}"
if unique_key in self.seen_messages:
return True
self.seen_messages.add(unique_key)
# 控制缓存大小,避免内存溢出
if len(self.seen_messages) > self.max_size:
self.seen_messages.pop()
return False
难点2:频率限制与重试机制
import time
from tenacity import retry, stop_after_attempt, wait_exponential
class RateLimitedAPIClient:
def __init__(self, max_retries=3, base_delay=1):
self.max_retries = max_retries
self.base_delay = base_delay
self.last_call_time = 0
self.min_interval = 1.0 # 最小调用间隔1秒
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
async def call_with_retry(self, api_func, *args, **kwargs):
current_time = time.time()
elapsed = current_time - self.last_call_time
if elapsed < self.min_interval:
await asyncio.sleep(self.min_interval - elapsed)
try:
result = await api_func(*args, **kwargs)
self.last_call_time = time.time()
return result
except Exception as e:
if "rate limit" in str(e).lower():
await asyncio.sleep(5) # 遇到频率限制等待5秒
raise e
性能优化建议
1. 消息批量处理
async def batch_process_messages(messages, batch_size=10):
"""批量处理@消息,减少API调用次数"""
results = []
for i in range(0, len(messages), batch_size):
batch = messages[i:i + batch_size]
batch_tasks = [process_single_message(msg) for msg in batch]
batch_results = await asyncio.gather(*batch_tasks, return_exceptions=True)
results.extend(batch_results)
return results
2. 缓存优化策略
from functools import lru_cache
import json
class MessageCache:
def __init__(self, maxsize=1000, ttl=3600):
self.cache = {}
self.maxsize = maxsize
self.ttl = ttl
def get_cache_key(self, last_uid, at_time):
return f"{last_uid}_{at_time}"
def get(self, last_uid, at_time):
key = self.get_cache_key(last_uid, at_time)
cached = self.cache.get(key)
if cached and time.time() - cached['timestamp'] < self.ttl:
return cached['data']
return None
def set(self, last_uid, at_time, data):
key = self.get_cache_key(last_uid, at_time)
if len(self.cache) >= self.maxsize:
# LRU淘汰策略
oldest_key = min(self.cache.keys(), key=lambda k: self.cache[k]['timestamp'])
del self.cache[oldest_key]
self.cache[key] = {
'data': data,
'timestamp': time.time()
}
安全与合规考虑
1. 用户隐私保护
class PrivacyProtection:
@staticmethod
def anonymize_user_data(message):
"""匿名化用户敏感信息"""
anonymized = message.copy()
# 脱敏处理
if 'user' in anonymized:
anonymized['user']['uid'] = '***'
anonymized['user']['face'] = ''
# 移除可能包含敏感信息的内容
sensitive_fields = ['ip', 'device_info', 'location']
for field in sensitive_fields:
anonymized.pop(field, None)
return anonymized
@staticmethod
def should_store_message(message):
"""判断是否应该存储消息(合规性检查)"""
# 检查内容是否包含敏感信息
content = message.get('item', {}).get('source_content', '')
sensitive_keywords = ['密码', '身份证', '手机号', '银行卡']
return not any(keyword in content for keyword in sensitive_keywords)
2. API调用频率合规
class ComplianceManager:
def __init__(self):
self.call_log = []
self.max_calls_per_minute = 30 # B站API频率限制
def can_make_call(self):
current_time = time.time()
# 清理1分钟前的记录
self.call_log = [t for t in self.call_log if current_time - t < 60]
if len(self.call_log) >= self.max_calls_per_minute:
return False
self.call_log.append(current_time)
return True
async def compliant_call(self, api_func, *args, **kwargs):
while not self.can_make_call():
await asyncio.sleep(1)
return await api_func(*args, **kwargs)
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



