语音识别
赘述:
- 这里提到的两种方法都依赖于浏览器来实现语音识别,然后进行录音。
[En]
both methods mentioned here rely on the browser to allow voice recognition and then record.*
- 继而将语音流传给后端、调取第三方apk进行语音识别
关注点:
- 使用阿里云 – 指定识别16k的wav后缀名的音频文件
- 因先学习阿里云- 此时已get到转换音频文件的js方法
- so.未关注百度云、但是转换之后的音频流是肯定会被识别的
基本步骤:
- html中引入audio标签为后期传入音频流使用
- 两个按钮:开始、停止(播放)
- 调用recorder.js对音频文件进行处理
- 传给node服务器,调取api接口进行语音识别
重要步骤
- 在recoder.js中基本封装对音频的转换方法,不需要我们过多关注
- 重点关注音频流转换时如何将音频流传输到后台。
[En]
focus on how to transmit the audio stream to the backend when the audio stream is converted.*
- (依靠a标签的属性)将识别好的音频流转换成文件下载到本地,使用node服务读取本地文件
- 直接将音频流传递给后台,使用formdata传输的形式,将其传递给node服务器
使用前端html+nodejs+阿里云语音识别apk
阿里云使用axios发送请求的方式,进行前端与nodejs交互
html
//audio标签
<audio controls autoplay></audio>
//双按钮
<input onclick="startRecording()" type="button" value="录音">
<input onclick="playRecording()" type="button" value="播放">
js
- 本地js
var recorder;
var audio = document.querySelector('audio');
//开始录音
function startRecording() {
HZRecorder.get(function (rec) {
recorder = rec;
recorder.start();
});
}
//停止录音
function playRecording() {
recorder.play(audio);
}
//请求获取后台返回信息
$('.audio').click(function(){
$.post('http://localhost:9999/getAudioInfo',{} ,function(data){
console.log(data)
})
})
- recoder.js
在回放方法中将识别的音频流下载下来,后缀名为.wav,并将其保存到本地
(function (window) {
//兼容
window.URL = window.URL || window.webkitURL;
navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
var HZRecorder = function (stream, config) {
config = config || {};
//采样数位 8, 16
config.sampleBits = config.sampleBits || 16;
//采样率(1/6 44100)
config.sampleRate = config.sampleRate || (16000);
var context = new AudioContext();
var audioInput = context.createMediaStreamSource(stream);
var recorder = context.createScriptProcessor(4096, 1, 1);
var audioData = {
size: 0 //录音文件长度
, buffer: [] //录音缓存
, inputSampleRate: context.sampleRate //输入采样率
, inputSampleBits: 16 //输入采样数位 8, 16
, outputSampleRate: config.sampleRate //输出采样率
, oututSampleBits: config.sampleBits //输出采样数位 8, 16
, input: function (data) {
console.log(data)
this.buffer.push(new Float32Array(data));
this.size += data.length;
}
, compress: function () { //合并压缩
//合并
var data = new Float32Array(this.size);
var offset = 0;
for (var i = 0; i < this.buffer.length; i++) {
data.set(this.buffer[i], offset);
offset += this.buffer[i].length;
}
//压缩
var compression = parseInt(this.inputSampleRate / this.outputSampleRate);
var length = data.length / compression;
var result = new Float32Array(length);
var index = 0, j = 0;
while (index < length) {
result[index] = data[j];
j += compression;
index++;
}
return result;
}
, encodeWAV: function () {
var sampleRate = Math.min(this.inputSampleRate, this.outputSampleRate);
var sampleBits = Math.min(this.inputSampleBits, this.oututSampleBits);
var bytes = this.compress();
var dataLength = bytes.length * (sampleBits / 8);
var buffer = new ArrayBuffer(44 + dataLength);
var data = new DataView(buffer);
var channelCount = 1;//单声道
var offset = 0;
var writeString = function (str) {
for (var i = 0; i < str.length; i++) {
data.setUint8(offset + i, str.charCodeAt(i));
}
}
// 资源交换文件标识符
writeString('RIFF'); offset += 4;
// 下个地址开始到文件尾总字节数,即文件大小-8
data.setUint32(offset, 36 + dataLength, true); offset += 4;
// WAV文件标志
writeString('WAVE'); offset += 4;
// 波形格式标志
writeString('fmt '); offset += 4;
// 过滤字节,一般为 0x10 = 16
data.setUint32(offset, 16, true); offset += 4;
// 格式类别 (PCM形式采样数据)
data.setUint16(offset, 1, true); offset += 2;
// 通道数
data.setUint16(offset, channelCount, true); offset += 2;
// 采样率,每秒样本数,表示每个通道的播放速度
data.setUint32(offset, sampleRate, true); offset += 4;
// 波形数据传输率 (每秒平均字节数) 单声道×每秒数据位数×每样本数据位/8
data.setUint32(offset, channelCount * sampleRate * (sampleBits / 8), true); offset += 4;
// 快数据调整数 采样一次占用字节数 单声道×每样本的数据位数/8
data.setUint16(offset, channelCount * (sampleBits / 8), true); offset += 2;
// 每样本数据位数
data.setUint16(offset, sampleBits, true); offset += 2;
// 数据标识符
writeString('data'); offset += 4;
// 采样数据总数,即数据总大小-44
data.setUint32(offset, dataLength, true); offset += 4;
// 写入采样数据
if (sampleBits === 8) {
for (var i = 0; i < bytes.length; i++, offset++) {
var s = Math.max(-1, Math.min(1, bytes[i]));
var val = s < 0 ? s * 0x8000 : s * 0x7FFF;
val = parseInt(255 / (65535 / (val + 32768)));
data.setInt8(offset, val, true);
}
} else {
for (var i = 0; i < bytes.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, bytes[i]));
data.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
return new Blob([data], { type: 'audio/wav' });
}
};
//开始录音
this.start = function () {
audioInput.connect(recorder);
recorder.connect(context.destination);
}
//停止
this.stop = function () {
recorder.disconnect();
}
//获取音频文件
this.getBlob = function () {
this.stop();
return audioData.encodeWAV();
}
//回放
this.play = function (audio) {
if('msSaveOrOpenBlob' in navigator){
window.navigator.msSaveOrOpenBlob(res.data, new Date().toISOString() +'.wav');
}
var url = window.URL.createObjectURL(this.getBlob());
//audio标签路径
audio.src = url;
//创建a标签
let a = document.createElement('a');
a.setAttribute("href",url);
//利用a标签download属性来下载文件
a.setAttribute("download", new Date().toISOString() +'.wav');
a.click();
}
//音频采集
recorder.onaudioprocess = function (e) {
audioData.input(e.inputBuffer.getChannelData(0));
}
};
//抛出异常
HZRecorder.throwError = function (message) {
alert(message);
throw new function () { this.toString = function () { return message; } }
}
//是否支持录音
HZRecorder.canRecording = (navigator.getUserMedia != null);
//获取录音机
HZRecorder.get = function (callback, config) {
if (callback) {
if (navigator.getUserMedia) {
navigator.getUserMedia(
{ audio: true } //只启用音频
, function (stream) {
var rec = new HZRecorder(stream, config);
callback(rec);
}
, function (error) {
console.log(error);
switch (error.code || error.name) {
case 'PERMISSION_DENIED':
case 'PermissionDeniedError':
alert('用户拒绝提供信息。')
//HZRecorder.throwError('用户拒绝提供信息。');
break;
case 'NOT_SUPPORTED_ERROR':
case 'NotSupportedError':
alert('浏览器不支持硬件设备。')
//HZRecorder.throwError('浏览器不支持硬件设备。');
break;
case 'MANDATORY_UNSATISFIED_ERROR':
case 'MandatoryUnsatisfiedError':
alert('无法发现指定的硬件设备。')
//HZRecorder.throwError('无法发现指定的硬件设备。');
break;
default:
alert('无法打开麦克风。')
//HZRecorder.throwError('无法打开麦克风。异常信息:' + (error.code || error.name));
break;
}
});
} else {
alert('当前浏览器不支持录音功能。')
//HZRecorder.throwErr('当前浏览器不支持录音功能。'); return;
}
}
}
window.HZRecorder = HZRecorder;
})(window);
nodejs
//node 模块
const express = require('express')
const app = express()
//中间件
var bodyParser= require('body-parser');
app.use(bodyParser.urlencoded({extended:false}))
//请求
const request = require('request');
//语音文件
const fs = require('fs');
//阿里云语音模块
var RPCClient = require('@alicloud/pop-core').RPCClient;
app.all("*", function (req, res, next) {
//设置允许跨域的域名,*代表允许任意域名跨域
res.header("Access-Control-Allow-Origin", "*");
//允许的header类型
res.header("Access-Control-Allow-Headers", "content-type");
//跨域允许的请求方式
res.header("Access-Control-Allow-Methods", "DELETE,PUT,POST,GET,OPTIONS");
if (req.method.toLowerCase() == 'options')
res.send(200); //让options尝试请求快速结束
else
next();
})
/*
状态码描述
接口请求成功
1 返回识别结果
2 语音识别失败
其余除单数以外状态码
接口请求失败
3 读取音频文件失败
4 token 获取失败
5 请求接口返回错误语
*/
//前端请求
var data = null;
app.post('/getAudioInfo', function(req, res){
res.send(data);
});
//请求参数
var appkey = 'g0IXmKkXyorfskOl';
var url = 'http://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr';
var audioFile = './audio/2021-10-11T06_30_22.230Z.wav';//必须是采样率16KHZ的音频文件
var requestUrl = url + '?appkey=' + appkey;
/*
accessKeyId、accessKeySecret可在该地址获取:
https://ram.console.aliyun.com/manage/ak?spm=a2c4g.11186623.0.0.593d4883rMWyYs
*/
//获取token
var client = new RPCClient({
accessKeyId:
accessKeySecret:
endpoint: 'http://nls-meta.cn-shanghai.aliyuncs.com', //请求的网址
apiVersion: '2019-02-28'
});
client.request('CreateToken').then((result) => {
if(result.ErrMsg == ''){
var token = result.Token.Id; //返回token
process(requestUrl, token, audioFile); //调用读取音频文件
}else {
data = {
code: 4,
result: result.ErrMsg
};
}
});
//读音频文件
function process(requestUrl, token, audioFile) {
var audioContent = null;
try {
audioContent = fs.readFileSync(audioFile);
} catch (error) {
if (error.code == 'ENOENT') {
data = {
code: 3,
result: '文件不存在!'
};
}
return;
}
var options = {
url: requestUrl,
method: 'POST',
headers: {
'X-NLS-Token': token,
'Content-type': 'application/octet-stream',
'Content-Length': audioContent.length
},
body: audioContent
};
request(options, function callback(error, response, body) {
if (error != null) {
data = {
code: 5,
result: error
}
}
else {
if (response.statusCode == 200) {
body = JSON.parse(body);
if (body.status == 20000000) {
data = {
code: 1,
result: body.result
};
console.log(body.result);
} else {
data = {
code: 2,
result: '识别失败!'
}
}
} else {
data = {
code: response.statusCode,
result: '识别失败!'
};
}
}
});
}
app.listen(9999, function(){'server start success...'});
使用前端+nodejs+百度云语音识别apk
百度云使用websocket请求方式,进行前端与nodejs交互
html
与阿里云dom结构相同
js
这次与后台交互使用websocket
所以只描述回放部分的代码[En]
So only describe the code in the playback part
this.play = function (audio) {
//是否具有保存并打开blob的方法
if('msSaveOrOpenBlob' in navigator){
window.navigator.msSaveOrOpenBlob(res.data, new Date().toISOString() +'.wav');
}
//audio标签属性赋值
audio.src = window.URL.createObjectURL(this.getBlob());
//websock的连接地址
var ws = new WebSocket("ws://localhost:8181");
//建立ws连接
ws.onopen = () =>{
console.log('Connection to server opened');
//发送该音频流
ws.send(this.getBlob())
}
//获取ws信息
ws.onmessage = function(e){
var result_data = JSON.parse(e.data);
if(result_data.err_no == 0){
//返回信息成功
}else {
//出错
}
}
ws.onerror = function(){
//console.log("连接出错");
}
ws.onclose = function(e){
//console.log("服务器关闭");
}
}
nodejs
//百度云 语音识别api
let AipSpeech = require("baidu-aip-sdk").speech;
//创建ws服务
let Server = require('ws').Server;
//端口
const wss = new Server({
port: 8181
})
let resTxt;
//ws连接
wss.on('connection', ws => {
console.log('server connected');
ws.on('message', data => {
console.log('server recived audio blob');
// 务必替换百度云控制台中新建百度语音应用的 Api Key 和 Secret Key
let client = new AipSpeech(0, 'Api Key', 'Secret Key');
let voiceBase64 = new Buffer(data);
client.recognize(voiceBase64, 'wav', 16000).then(function(result) {
resTxt = JSON.stringify(result);
if(resTxt){
// 将结果传给前端
ws.send(resTxt);
}
}, function(err) {
console.log(err);
});
})
//报错提示信息
ws.on('error', error => {
console.log('Error:' + error);
})
//关闭连接提示信息
ws.on('close', () => {
console.log('Websocket is closed');
})
})
//断开连接提示信息
wss.on('disconnection', ws => {
ws.on('message', msg => {
console.log('server recived msg:' + msg);
})
})
Original: https://blog.csdn.net/qq_52769681/article/details/121228369
Author: SanErYa_
Title: 语音识别(html5+nodejs)
原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/512147/
转载文章受原作者版权保护。转载请注明原作者出处!