一、获取录音设备
var getAudioDevice = function(){
var deferred = $.Deferred();
if(navigator.mediaDevices && navigator.mediaDevices.enumerateDevices){
try{
var deviceArray = [];
navigator.mediaDevices.enumerateDevices().then(function(ret){
for(var i = 0; i < ret.length; i++){
if(ret[i].kind === 'audioinput' && ret[i].deviceId !== 'communications' && ret[i].deviceId !== 'default'){
deviceArray.push(ret[i]);
}
}
deferred.resolve(deviceArray);
});
}catch(e){
deferred.reject('该浏览器无法获取录音设备!');
}
}else{
deferred.reject('该浏览器无法获取录音设备!');
}
return deferred.promise();
}
二、根据录音设备获取音频数据
;(function(undefined){
"use strict";
var _global;
function JscAsr(config){
this._defaultConfig = {
deviceId: null,
asrUrl: "",
workerPath: "voice_worker.js",
downsampleRate: 8000,
interval: 100,
onMessage: function(){},
onLog: function(){}
};
this._extend = function(){
var length = arguments.length;
var target = arguments[0] || {};
if (typeof target!="object" && typeof target != "function") {
target = {};
}
if (length == 1) {
target = arguments[0];
}
for (var i = 1; i < length; i++) {
var source = arguments[i];
for (var key in source) {
if (Object.prototype.hasOwnProperty.call(source, key)) {
target[key] = source[key];
}
}
}
return target;
}
this._dateFormat = function(fmt,date){
var o = {
"M+" : date.getMonth()+1,
"d+" : date.getDate(),
"h+" : date.getHours(),
"m+" : date.getMinutes(),
"s+" : date.getSeconds(),
"q+" : Math.floor((date.getMonth()+3)/3),
"S" : date.getMilliseconds()
};
if(/(y+)/.test(fmt))
fmt=fmt.replace(RegExp.$1, (date.getFullYear()+"").substr(4 - RegExp.$1.length));
for(var k in o)
if(new RegExp("("+ k +")").test(fmt))
fmt = fmt.replace(RegExp.$1, (RegExp.$1.length==1) ? (o[k]) : (("00"+ o[k]).substr((""+ o[k]).length)));
return fmt;
}
this.config = this._extend(this._defaultConfig,config);
this.params = {
worker: null,
ws: null,
recordType: 0,
intervalKey: null,
isStart: false
};
this.init = function(callback){
var self = this;
window.AudioContext = window.AudioContext || window.webkitAudioContext;
var audioCtx = new AudioContext();
audioCtx.resume();
if(audioCtx.state != "running"){
callback({
code: 1001,
msg: "请求麦克风使用授权!"
});
}else{
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || MediaDevices.getUserMedia;
if(navigator.getUserMedia){
var constraints = {};
constraints.audio = self.config.deviceId ? { optional: [{sourceId: self.config.deviceId}] } : true;
navigator.getUserMedia(constraints,function(stream){
self.config.streamId = stream.id;
var gainNode = audioCtx.createGain();
var source = audioCtx.createMediaStreamSource(stream);
function convertToMono(input) {
var splitter = audioCtx.createChannelSplitter(2);
var merger = audioCtx.createChannelMerger(2);
input.connect(splitter);
splitter.connect(merger, 0, 0);
splitter.connect(merger, 0, 1);
return merger;
}
var merger = convertToMono(source);
if(8000 < audioCtx.sampleRate){
var biquadFilter = audioCtx.createBiquadFilter();
merger.connect(biquadFilter);
biquadFilter.connect(gainNode);
biquadFilter.type = biquadFilter.LOWPASS || "lowpass";
biquadFilter.frequency.value = 8000/2;
biquadFilter.connect(gainNode);
}else{
merger.connect(gainNode);
}
var bufferLen = 4096,inputChannelLen = 2, outputChannelLen = 2,scriptProcessor = null;
if(gainNode.context.createScriptProcessor){
scriptProcessor = gainNode.context.createScriptProcessor(bufferLen,inputChannelLen,outputChannelLen)
}else{
scriptProcessor = gainNode.context.createJavaScriptNode(bufferLen,inputChannelLen,outputChannelLen);
}
scriptProcessor.onaudioprocess = function(e){
callback({
code: 1002,
msg: "正在获取录音数据!",
data: e
});
}
gainNode.connect(scriptProcessor);
scriptProcessor.connect(gainNode.context.destination);
var zeroGain = audioCtx.createGain();
zeroGain.gain.value = 0.0;
gainNode.connect(zeroGain);
zeroGain.connect(audioCtx.destination);
callback({
code: 1000,
msg: "录音设备初始化成功!",
data: {
sampleRate: gainNode.context.sampleRate
}
});
},function(err){
callback({
code: 1003,
msg: "录音设备初始化失败!",
data: err
});
});
}else{
callback({
code: 1004,
msg: "此浏览器中不支持麦克风录音!"
});
}
}
};
this.start = function(){
var self = this;
if(self.params.recordType == 1){
return;
}
if(self.params.recordType == 2){
self.params.recordType = 1;
return;
}
self.params.worker = new Worker(self.config.workerPath);
self.params.worker.onmessage = function(e){
var ret = e.data;
if(self.params.ws.readyState == 1 && ret.action == "get16KMonoBlob"){
self.params.ws.send(ret.data);
}
};
self.params.ws = new WebSocket(self.config.asrUrl);
self.params.ws.onopen = function(){
self.params.intervalKey = setInterval(function(){
if(self.params.recordType == 1){
self.params.worker.postMessage({
action: "get16KMonoBlob",
params: {
type: 'audio/wav'
}
});
}
},self.config.interval);
self.config.onLog({
code: 2000,
msg: "语音识别服务连接成功!"
});
}
self.params.ws.onmessage = function(e){
var ret = JSON.parse(e.data);
if(ret.status == 0 && ret.result && ret.result.hypotheses && ret.result.hypotheses[0] && ret.result.hypotheses[0].transcript){
self.config.onMessage({
id: self.config.deviceId,
final: ret.result.final,
msg: ret.result.hypotheses[0].transcript,
time: self._dateFormat("yyyy-MM-dd hh:mm:ss",new Date())
});
}
};
self.params.ws.onclose = function(){
self.config.onLog({
code: 2001,
msg: "语音识别连接关闭!"
});
};
self.params.ws.onerror = function(){
self.config.onLog({
code: 2002,
msg: "语音识别连接失败!"
});
};
self.init(function(ret){
if(ret.code == 1002){
if(self.params.recordType == 1){
self.params.worker.postMessage({
action: "record",
params: {
inputBufferLeft: ret.data.inputBuffer.getChannelData(0),
inputBufferRight: ret.data.inputBuffer.getChannelData(1)
}
});
if(!self.params.isStart){
self.config.onLog({
code: 3000,
msg: "开始语音识别!"
});
self.params.isStart = true;
}
}
}else if(ret.code == 1000){
self.params.worker.postMessage({
action: "config",
params: {
sampleRate: ret.data.sampleRate,
downsampleRate: self.config.downsampleRate || ret.data.sampleRate
}
});
self.params.recordType = 1;
}else{
self.params.worker.postMessage({
action: "close"
});
self.params.ws.close();
self.params.recordType = 0;
self.params.isStart = false;
}
self.config.onLog({
code: ret.code,
msg: ret.msg
});
});
};
this.stop = function(){
this.params.recordType = 2;
this.params.isStart = false;
this.config.onLog({
code: 3001,
msg: "暂停语音识别!"
});
};
this.destroy = function(){
this.params.ws.close();
this.params.worker.terminate();
this.config.onLog({
code: 4001,
msg: "销毁语音识别对象!"
});
};
}
_global = (function(){return this || (0,eval)("this");}());
if(typeof module !== "undefined" && module.exports){
module.exports = JscAsr;
}else if(typeof define === "function" && define.amd){
define(function(){
return JscAsr;
});
}else{
!("JscAsr" in _global) && (_global.JscAsr = JscAsr);
}
}());
三、处理音频数据
var global = this;
global.onmessage = function(e){
var receiveData = e.data || {};
receiveData.action && vioceWorker[receiveData.action]
&& Object.prototype.toString.call(vioceWorker[receiveData.action]) === '[object Function]'
&& vioceWorker[receiveData.action](receiveData);
}
var vioceWorker = {
sampleRate: 0,
downsampleRate: 0,
recordBuffersLeft: [],
recordBuffersRight: [],
recordLength: 0,
config: function(data){
var self = this;
self.sampleRate = data.params.sampleRate;
self.downsampleRate = data.params.downsampleRate || data.params.sampleRate;
},
record: function(data){
var self = this;
var buffersLeft = data.params.inputBufferLeft;
var buffersRight = data.params.inputBufferRight;
self.recordBuffersLeft.push(buffersLeft);
self.recordBuffersRight.push(buffersRight);
self.recordLength += buffersLeft.length;
},
get16KMonoBlob: function(data){
var self = this;
var bufferLeft = self.mergeBuffers(self.recordBuffersLeft, self.recordLength);
var dataview = self.encodeRAW(self.downsample(bufferLeft, self.sampleRate, self.downsampleRate), true);
var audioBlob = new Blob([dataview], {type: data.params.type});
self.clear();
global.postMessage({
action: data.action,
data: audioBlob
});
},
mergeBuffers: function(recBuffers, recLength){
var result = new Float32Array(recLength);
var offset = 0;
for (var i = 0; i < recBuffers.length; i++) {
result.set(recBuffers[i], offset);
offset += recBuffers[i].length;
}
return result;
},
encodeRAW: function(samples){
var self = this;
var buffer = new ArrayBuffer(samples.length * 2);
var view = new DataView(buffer);
self.floatTo16BitPCM(view, 0, samples);
return view;
},
downsample: function(e, sampleRate, outputSampleRate){
if (sampleRate <= outputSampleRate) return e;
var t = e.length;
sampleRate += 0.0;
outputSampleRate += 0.0;
var s = 0,
o = sampleRate / outputSampleRate,
u = Math.ceil(t * outputSampleRate / sampleRate),
a = new Float32Array(u);
for (var i = 0; i < u; i++) {
a[i] = e[Math.floor(s)];
s += o;
}
return a;
},
floatTo16BitPCM: function(output, offset, input){
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
},
clear: function(){
var self = this;
self.recordBuffersLeft = [];
self.recordBuffersRight = [];
self.recordLength = 0;
},
close: function(){
this.clear();
global.close();
}
}
四、推送音频数据给语音识别服务获取识别结果
五、处理业务