华为云Flexus+DeepSeek征文 | ​​华为云ModelArts Studio大模型与企业AI会议纪要场景的对接方案

一、方案架构与核心代码

  1. 基础环境配置

华为云ModelArts SDK初始化

from modelarts.session import Session
from modelarts.estimator import Estimator

session = Session(access_key='your_access_key',
                 secret_key='your_secret_key',
                 project_id='your_project_id',
                 region='cn-north-4')

# 创建Notebook开发环境
estimator = Estimator(
    modelarts_session=session,
    train_instance_type='ml.p3.large',
    train_instance_count=1,
    framework_type='PyTorch-1.8',
    framework_version='py3',
    log_url='obs://your-bucket/logs/'
)
  1. 会议音频处理模块
import numpy as np
from huaweicloud_sis.client.rasr_client import RasrClient
from huaweicloud_sis.bean.rasr_request import RasrRequest

# 华为云语音识别服务初始化
def init_speech_client():
    ak = 'your_ak'
    sk = 'your_sk'
    region = 'cn-north-4'
    project_id = 'your_project_id'
    return RasrClient(ak, sk, region, project_id)

# 音频转文字处理
def audio_to_text(audio_path):
    client = init_speech_client()
    request = RasrRequest()
    request.set_audio_format('wav')  # 支持wav, mp3等格式
    request.set_property('speaker_diarization', 'true')  # 开启说话人分离
    request.add_word('公司术语')  # 添加自定义词汇
    
    with open(audio_path, 'rb') as f:
        audio_data = f.read()
    request.set_data(audio_data)
    
    result = client.short_audio_recognize(request)
    return result.get_result()

# 示例使用
transcript = audio_to_text('meeting_audio.wav')

二、大模型集成代码

  1. Flexus+DeepSeek模型加载
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# 加载华为云ModelArts上的预训练模型
model_path = 'obs://your-bucket/models/flexus-deepseek-meeting/'

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)

# 会议纪要生成prompt模板
MEETING_PROMPT = """
作为专业的会议纪要助手,请根据以下会议转录内容生成结构化会议纪要:

会议主题: {meeting_topic}
参会人员: {participants}
会议时间: {meeting_time}
转录内容:
{transcript}

请按照以下格式输出:
### 会议摘要
- 主要讨论点1
- 主要讨论点2

### 决策事项
1. 事项描述 (负责人: xxx, 截止时间: yyyy-mm-dd)

### 待办事项
- [ ] 任务1 (负责人: xxx)
- [ ] 任务2 (负责人: xxx)
"""
  1. 会议纪要生成核心逻辑
def generate_meeting_minutes(transcript, meeting_info):
    # 预处理输入
    inputs = MEETING_PROMPT.format(
        meeting_topic=meeting_info['topic'],
        participants=", ".join(meeting_info['participants']),
        meeting_time=meeting_info['time'],
        transcript=transcript
    )
    
    # 模型推理
    input_ids = tokenizer.encode(inputs, return_tensors="pt").to('cuda')
    outputs = model.generate(
        input_ids,
        max_length=2048,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        num_return_sequences=1
    )
    
    # 后处理
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return post_process_output(generated_text)

def post_process_output(text):
    # 提取结构化结果
    import re
    pattern = r'### (.*?)\n(.*?)(?=###|$)'
    sections = re.findall(pattern, text, re.DOTALL)
    
    result = {
        'summary': [],
        'decisions': [],
        'action_items': []
    }
    
    for section in sections:
        title, content = section
        if '摘要' in title:
            result['summary'] = [item.strip() for item in content.split('-')[1:]]
        elif '决策' in title:
            result['decisions'] = parse_decisions(content)
        elif '待办' in title:
            result['action_items'] = parse_action_items(content)
    
    return result

def parse_decisions(content):
    # 解析决策事项
    decisions = []
    for line in content.split('\n'):
        if not line.strip():
            continue
        match = re.match(r'\d+\. (.*?) \(负责人: (.*?), 截止时间: (.*?)\)', line)
        if match:
            desc, owner, deadline = match.groups()
            decisions.append({
                'description': desc,
                'owner': owner,
                'deadline': deadline
            })
    return decisions

三、企业系统对接代码

  1. 与华为云会议服务集成
from huaweicloudsdkcore.auth.credentials import BasicCredentials
from huaweicloudsdkmeeting.v1 import *

# 初始化会议服务客户端
def init_meeting_client():
    credentials = BasicCredentials(
        'your_ak',
        'your_sk',
        'your_project_id'
    )
    return MeetingClient.new_builder() \
        .with_credentials(credentials) \
        .with_region(MeetingRegion.value_of('cn-north-4')) \
        .build()

# 获取会议录制文件
def get_meeting_recordings(meeting_id):
    client = init_meeting_client()
    request = ListRecordingsRequest()
    request.conference_id = meeting_id
    response = client.list_recordings(request)
    return response.recordings

# 下载会议录音
def download_recording(recording_id, save_path):
    client = init_meeting_client()
    request = DownloadRecordingRequest()
    request.recording_id = recording_id
    response = client.download_recording(request, stream=True)
    
    with open(save_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
  1. 与企业知识库对接
    import requests
    from datetime import datetime

将会议纪要存入企业知识库

def save_to_knowledge_base(meeting_minutes, meeting_info):
payload = {
“document_type”: “meeting_minutes”,
“title”: f"{meeting_info[‘topic’]}会议纪要",
“content”: meeting_minutes,
“metadata”: {
“participants”: meeting_info[‘participants’],
“meeting_time”: meeting_info[‘time’],
“created_at”: datetime.now().isoformat(),
“related_projects”: meeting_info.get(‘projects’, [])
},
“tags”: [“auto-generated”, “meeting”]
}

headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer your_kb_token"
}

response = requests.post(
    "https://your-kb-api/api/v1/documents",
    json=payload,
    headers=headers
)
return response.json()

四、完整工作流示例

主处理流程

def process_meeting(meeting_id):
# 1. 从会议服务获取录音
recordings = get_meeting_recordings(meeting_id)
if not recordings:
raise Exception(“未找到会议录音”)

# 2. 下载最新录音
latest_recording = sorted(recordings, key=lambda x: x.create_time)[-1]
audio_path = f"/tmp/{meeting_id}.wav"
download_recording(latest_recording.id, audio_path)

# 3. 语音转文字
transcript = audio_to_text(audio_path)

# 4. 获取会议元数据
meeting_info = get_meeting_info(meeting_id)  # 实现略

# 5. 生成会议纪要
minutes = generate_meeting_minutes(transcript, meeting_info)

# 6. 保存到知识库
save_result = save_to_knowledge_base(minutes, meeting_info)

# 7. 通知相关人员
notify_participants(meeting_info['participants'], save_result['url'])

return {
    'status': 'success',
    'minutes_url': save_result['url']
}

示例调用

if name == “main”:
result = process_meeting(“meeting123”)
print(f"会议纪要处理完成,访问地址: {result[‘minutes_url’]}")
五、模型训练与优化代码

  1. 领域适配微调
    from modelarts.train import TrainingJob

创建微调训练任务

def fine_tune_model(train_data_path):
job = TrainingJob(
name=‘flexus-deepseek-meeting-ft’,
algorithm=‘PyTorch-1.8’,
inputs=[
{
‘data_url’: train_data_path,
‘type’: ‘obs’
}
],
outputs=[
{
‘train_url’: ‘obs://your-bucket/output/’,
‘type’: ‘obs’
}
],
parameters={
‘learning_rate’: 5e-5,
‘epochs’: 3,
‘batch_size’: 8,
‘max_seq_length’: 2048
},
code_dir=‘obs://your-bucket/code/’,
boot_file=‘train.py’,
instance_type=‘ml.p3.8xlarge’,
instance_count=2
)

job.create()
job.run()
return job
  1. 评估脚本示例

train.py

import torch
from transformers import Trainer, TrainingArguments

自定义评估指标

def compute_metrics(eval_pred):
predictions, labels = eval_pred
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

# 计算关键信息提取准确率
key_info_acc = calculate_key_info_accuracy(decoded_preds, decoded_labels)

# 计算格式合规率
format_score = calculate_format_score(decoded_preds)

return {
    'key_info_accuracy': key_info_acc,
    'format_score': format_score
}

训练配置

training_args = TrainingArguments(
output_dir=‘./results’,
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
evaluation_strategy=“epoch”,
save_strategy=“epoch”,
logging_dir=‘./logs’,
logging_steps=50,
learning_rate=5e-5,
fp16=True,
report_to=“none”
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics
)

trainer.train()
六、部署与API服务

  1. ModelArts在线服务部署
    from modelarts.deploy import Predictor

创建在线推理服务

def deploy_model(model_path):
predictor = Predictor(
name=‘meeting-minutes-service’,
model_path=model_path,
inference_spec=‘inference.py’,
instance_type=‘ml.p2.large’,
instance_count=1,
framework=‘PyTorch-1.8’,
framework_version=‘py3’,
wait=True
)
predictor.create()
return predictor

inference.py示例

from flask import Flask, request, jsonify

app = Flask(name)

@app.route(‘/predict’, methods=[‘POST’])
def predict():
data = request.json
transcript = data[‘transcript’]
meeting_info = data[‘meeting_info’]

minutes = generate_meeting_minutes(transcript, meeting_info)
return jsonify(minutes)

if name == ‘main’:
app.run(host=‘0.0.0.0’, port=8080)
2. API调用示例
import requests

def call_meeting_minutes_api(transcript, meeting_info):
url = “https://your-endpoint/predict”
headers = {
“Content-Type”: “application/json”,
“X-Auth-Token”: “your_api_token”
}
payload = {
“transcript”: transcript,
“meeting_info”: meeting_info
}

response = requests.post(url, json=payload, headers=headers)
return response.json()

使用示例

result = call_meeting_minutes_api(transcript, {
“topic”: “Q3产品规划会议”,
“participants”: [“张三”, “李四”, “王五”],
“time”: “2023-09-15 14:00”
})
七、安全与权限控制

华为云IAM权限验证装饰器

def iam_required(permission):
def decorator(f):
@wraps(f)
def decorated_function(*args, **kwargs):
token = request.headers.get(‘X-Auth-Token’)
if not verify_iam_token(token, permission):
return jsonify({“error”: “Unauthorized”}), 403
return f(*args, **kwargs)
return decorated_function
return decorator

数据加密处理

from huaweicloudsdkcore.auth.encryption_signer import EncryptionSigner

def encrypt_sensitive_data(data):
signer = EncryptionSigner(‘your_encryption_key’)
return signer.encrypt(data)

使用示例

@iam_required(‘MeetingMinutes.Write’)
def save_minutes():
data = request.json
encrypted_data = encrypt_sensitive_data(data[‘content’])
# 存储处理…
本方案提供了从音频处理、大模型集成到企业系统对接的完整代码实现,开发者可根据实际需求调整参数和流程。建议在实际部署前进行充分的测试和性能优化,特别是针对企业特定的会议场景和术语进行模型微调。

### 华为云 ModelArts 的服务器资源配置分配方法 华为云 ModelArts 是一款面向 AI 开发者的全流程服务平台,支持数据预处理、模型训练、部署等功能。关于 ModelArts 的服务器资源配置分配方法,可以从以下几个方面展开说明: #### 1. **计算资源的选择** ModelArts 提供多种类型的计算资源以满足不同的应用场景需求。例如,在 Flexus 云服务中提到的 Flexus 云服务器 X 实例和 Flexus 应用服务器 L 实例均可作为底层支撑[^1]。开发者可以根据具体的业务场景选择适合的实例类型,比如 GPU 加速实例用于深度学习训练。 对于 Yolo 模型开发这样的任务,则可能更多依赖于高性能 GPU 资源来加速模型训练过程[^2]。因此,在实际操作前需明确所需硬件规格(如 CPU 核心数、内存大小、GPU 类型及其数量),并通过平台提供的选项完成配置。 #### 2. **账户管理初次设置** 当获得由管理员分发给每位用户的独立账号之后,首次登录时会被引导至修改默认密码页面[^4]。这一步骤非常重要,不仅保障个人隐私同时也增强了系统的安全性。 接着按照指引进入主界面并确认所选区域是否正确无误——因为不同地理位置可能会对应不一样的数据中心和服务延迟情况等因素影响最终效果表现。另外值得注意的是,“收藏”功能可以帮助快速定位常用工具或模块位置以便日后高效调用。 #### 3. **具体配置流程** - 登录到指定网址后输入用户名及新设定后的密码; - 找到左侧菜单栏里的“三道杠”按钮打开全部可用选项列表; - 定位目标地域节点信息后再切换回原路径继续浏览其他子栏目内容直到找到标注名称为 “ModelArts”的链接入口处单击跳转过去即可开始正式探索之旅啦! 此外还有些额外的小贴士可供参考借鉴:如果希望减少每次重新查找的时间成本的话不妨试试看那个固定图标的功能哟~它能让经常使用的某些特定项目始终保持在显眼的位置方便随时取阅查阅哦~ ```python # 示例代码展示如何连接远程服务器进行自动化脚本编写 import paramiko ssh_client =paramiko.SSHClient() ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh_client.connect(hostname='your_server_ip', username='root', password='password') stdin, stdout, stderr = ssh_client.exec_command('nvidia-smi') print(stdout.read().decode()) ssh_client.close() ``` 以上就是有关华为云 ModelArts 平台下服务器资源配置的一些基本介绍及相关注意事项。合理规划好各项参数指标能够极大地提升工作效率同时降低成本支出比例达到双赢局面的效果呢! ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值