html 图灵语音,人工智能 - 基于火狐浏览器的语音识别,语音自动回复

最新推荐文章于 2021-06-30 09:43:02 发布

weixin_39922361

最新推荐文章于 2021-06-30 09:43:02 发布

阅读量309

点赞数

文章标签： html 图灵语音

本文介绍了一款基于火狐浏览器的语音识别与自动回复系统，该系统利用Recorder.js录制音频并通过百度AI进行语音转文字处理，同时使用图灵机器人提供回复内容，并最终通过文字转语音实现交互。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

[TOC]

人工智能 - 基于火狐浏览器的语音识别,语音自动回复

一. 需求工具

下载安装火狐浏览器

因为火狐浏览器自带麦克风

安装Recorer.js

一个易于使用的录音机，以Matt Diamond的录音机为核心

mongoDB(数据库) - pymongo

可以选择其他数据库

jQuery

Flask框架

二. 不废话,上代码

1. index.HTML

我是玩具

录音

发送语音

var serv = "http://192.168.11.206:9527";

var reco = null;

var audio_context = new AudioContext();//音频内容对象

navigator.getUserMedia = (navigator.getUserMedia ||

navigator.webkitGetUserMedia ||

navigator.mozGetUserMedia ||

navigator.msGetUserMedia);

navigator.getUserMedia({audio:true}, create_stream, function (err) {

console.log(err)

});

function create_stream(user_media) {

var stream_input = audio_context.createMediaStreamSource(user_media);

reco = new Recorder(stream_input);

}

function start_reco() {

reco.record();

}

function stop_reco() {

reco.stop();

reco.exportWAV(function (wav_file) {

console.log(wav_file);

var formdata = new FormData(); // form 表单 {key:value}

formdata.append("reco",wav_file); // form input type="file"

formdata.append("key","value");

// # value

$.ajax({

url: serv + "/upload",

type: 'post',

processData: false,

contentType: false,

data: formdata,

dataType: 'json',

success: function (data) {

console.log(data);

if(data.code == 0){

document.getElementById("player").src = "http://192.168.11.206:9527/get_file/"+data.filename;

document.getElementById("content").innerText = data.content;

}

})

});

reco.clear();

}

2. adiou.py(封装的百度AI, 图灵机器人函数)

from aip import AipSpeech

from aip import AipNlp

import os

from uuid import uuid4

""" 你的 APPID AK SK """

APP_ID = '15837844'

API_KEY = '411VNGbuZVbDNZU78LqTzfsV'

SECRET_KEY = '84AnwR2NARGMqnC6WFnzqQL9WWdWh5bW'

client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

nlp_client = AipNlp(APP_ID, API_KEY, SECRET_KEY)

def get_file_content(filePath):

os.system(f"ffmpeg -y -i {filePath} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {filePath}.pcm")

with open(f"{filePath}.pcm", 'rb') as fp:

return fp.read()

def audio2text(filePath):

res = client.asr(get_file_content(filePath), 'pcm', 16000, {

'dev_pid': 1536,

})

text = res.get("result")[0]

print(text)

return text

import requests

def to_tuling(text, uid):

data = {

"perception": {

"inputText": {

"text": "北京"

}

"userInfo": {

"apiKey": "a4c4a668c9f94d0c928544f95a3c44fb",

"userId": "123"

}

data["perception"]["inputText"]["text"] = text

data["userInfo"]["userId"] = uid

res = requests.post("http://openapi.tuling123.com/openapi/api/v2", json=data)

# print(res.content)

res_json = res.json()

text = res_json.get("results")[0].get("values").get("text")

print(text)

return text

def my_nlp(text):

if nlp_client.simnet(text,"你叫什么名字").get("score") >= 0.75:

A = "我叫银王八"

return A

if nlp_client.simnet(text,"你今年几岁了").get("score") >= 0.75:

A = "我今年999岁了"

return A

A = to_tuling(text,"open123")

return A

def text2audio(text):

result = client.synthesis(text, 'zh', 1, {

'vol': 5,

'per': 4,

'spd': 4,

'pit': 7,

})

filename = f"{uuid4()}.mp3"

# 识别正确返回语音二进制错误则返回dict 参照下面错误码

if not isinstance(result, dict):

# print(result)

with open(filename, 'wb') as f:

f.write(result)

return filename

3. app.py(路由视图, 逻辑)

from flask import Flask, render_template, request, jsonify, send_file

from uuid import uuid4

from adiou import audio2text, text2audio, my_nlp

from mongodb import MONGODB

from flask_cors import CORS

app = Flask(__name__)

CORS(app, resources={r"/*": {"origins": "*"}})

CORS(app, supports_credentials=True)

@app.route('/')

def hello_world():

print('')

return render_template("index.html")

@app.route("/upload", methods=["POST"])

def upload():

print("111")

fi = request.files.get("reco")

print("2222", fi)

fi_name = f"{uuid4()}.wav"

print(fi_name, "3333")

fi.save(fi_name)

text = audio2text(fi_name)

print(text, "text")

text1 = {"kong": text}

res1 = MONGODB.users.insert_one(text1)

print(res1)

new_text = my_nlp(text)

print(new_text, "new_text")

text2 = {"机器人": new_text}

res2 = MONGODB.users.insert_one(text2)

print(res2)

filename = text2audio(new_text)

print(filename, "filename")

ret = {

"filename":filename,

"content":new_text,

"code":0

}

return jsonify(ret)

@app.route("/get_file/")

def get_file(filename):

return send_file(filename)

4. mongodb.py

from pymongo import MongoClient

conn = MongoClient("127.0.0.1", 27017)

MONGODB = conn["db3"]

5. run.py(Flask启动)

from app import app

if __name__ == '__main__':

app.run("0.0.0.0", 9527, debug=True)