语音识别器

语音识别器

环境

这里我用的linux下kali虚拟机。
Anaconda环境,pycharm编译器。python3.6.13

主要包

# 直接就是清华源下载,清华源是真的好用,谁用谁知道
pip install --user -i https://pypi.tuna.tsinghua.edu.cn/simple numpy
pip install --user -i https://pypi.tuna.tsinghua.edu.cn/simple scipy
pip install --user -i https://pypi.tuna.tsinghua.edu.cn/simple hmmlearn
pip install --user -i https://pypi.tuna.tsinghua.edu.cn/simple python_speech_features

环境展示

这里用到的单词音频就是我们上篇通过文字发音保存的音频。

语音识别器

; 写作过程

1.导入相关包


import os
import argparse
import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
from python_speech_features import mfcc

2.定义类来创建隐马尔科夫模型


class HMMTrainer(object):

    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):

        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components,
                                         covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    def get_score(self, input_data):
        return self.model.score(input_data)

3.定义一个函数来解析其中的命令


def build_arg_parse():
    parser = argparse.ArgumentParser(description='Trains the HMM classifier')
    parser.add_argument("--input-folder", dest="input_folder", required=True,
                        help="Input folder containing the audio files insubfolders")
    return parser

4.定义主函数


if __name__ == '__main__':
    args = build_arg_parse().parse_args()
    input_folder = args.input_folder

    hmm_models = []

    for dirname in os.listdir(input_folder):

        subfolder = os.path.join(input_folder, dirname)

        if not os.path.isdir(subfolder):
            continue

        label = subfolder[subfolder.rfind('/') + 1:]

        X = np.array([])
        y_words = []

"""
        for x in os.listdir(subfolder):
            if x.endswith('.wav'):
                print(x)
"""
        for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')]:
            print(filename)

            filepath = os.path.join(subfolder, filename)

            sampling_freq, audio = wavfile.read(filepath)
            print(sampling_freq)

            mfcc_features = mfcc(audio, sampling_freq)

            if len(X) == 0:
                X = mfcc_features
            else:
                X = np.append(X, mfcc_features, axis=0)

            y_words.append(label)

        hmm_trainer = HMMTrainer()
        hmm_trainer.train(X)
        hmm_models.append((hmm_trainer, label))
        hmm_trainer = None

    input_files = ['audio_files/hello/hello.wav',
                   'audio_files/linux/linux.wav',
                   'audio_files/python/python.wav',
                   'audio_files/windows/windows.wav',
                   'audio_files/你好/你好.wav',
                   'audio_files/place/place.wav',
                   'audio_files/variables/variables.wav']

    for input_file in input_files:

        sampling_freq, audio = wavfile.read(input_file)

        mfcc_features = mfcc(audio, sampling_freq)

        max_score = 0
        output_label = None

        for item in hmm_models:
            hmm_model, label = item

            score = hmm_model.get_score(mfcc_features)
            if score > max_score:
                max_score = score
                output_label = label

        print('\nTrue:', input_file[input_file.find('/') + 1:input_file.rfind('/')])
        print('Predicted:', output_label)

5.运行,这里运行是在终端进行运行的。

python speech_recognizer.py --input-folder audio_files

语音识别器

注意事项

针对以上代码运行,出现了一个问题:

语音识别器
这个报错不影响运行,但影响语音识别的准确率。
解决:网上查找资料说是帧率的问题。需要 把22kHz帧率改为16kHz.
原回答:https://github.com/mozilla/DeepSpeech/issues/1888
语音识别器
根据提示,我尝试输出帧率,发现确实是22kHz帧率。
语音识别器
然后就是改帧率:要改的地方有两个:
语音识别器
语音识别器
改好帧率之后,语音的识别率也提高了好多。
语音识别器

; 参考书籍

代码参考

本文代码基本仿照《Python 机器学习经典实例》完成。
    英文名:《Python Machine Learning Cookbook》

基础知识学习

深度学习基础知识我参考的是:
    《Python深度学习-基于TensorFlow》

完整代码


import os
import argparse
import numpy as np
from scipy.io import wavfile
from hmmlearn import hmm
from python_speech_features import mfcc

class HMMTrainer(object):

    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):

        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components,
                                         covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    def get_score(self, input_data):
        return self.model.score(input_data)

def build_arg_parse():
    parser = argparse.ArgumentParser(description='Trains the HMM classifier')
    parser.add_argument("--input-folder", dest="input_folder", required=True,
                        help="Input folder containing the audio files insubfolders")
    return parser

if __name__ == '__main__':
    args = build_arg_parse().parse_args()
    input_folder = args.input_folder

    hmm_models = []

    for dirname in os.listdir(input_folder):

        subfolder = os.path.join(input_folder, dirname)

        if not os.path.isdir(subfolder):
            continue

        label = subfolder[subfolder.rfind('/') + 1:]

        X = np.array([])
        y_words = []

"""
        for x in os.listdir(subfolder):
            if x.endswith('.wav'):
                print(x)
"""
        for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')]:
            print(filename)

            filepath = os.path.join(subfolder, filename)

            sampling_freq, audio = wavfile.read(filepath)
            print(sampling_freq)

            mfcc_features = mfcc(audio, 16000)

            if len(X) == 0:
                X = mfcc_features
            else:
                X = np.append(X, mfcc_features, axis=0)

            y_words.append(label)

        hmm_trainer = HMMTrainer()
        hmm_trainer.train(X)
        hmm_models.append((hmm_trainer, label))
        hmm_trainer = None

    input_files = ['audio_files/hello/hello.wav',
                   'audio_files/linux/linux.wav',
                   'audio_files/python/python.wav',
                   'audio_files/windows/windows.wav',
                   'audio_files/你好/你好.wav',
                   'audio_files/place/place.wav',
                   'audio_files/variables/variables.wav']

    for input_file in input_files:

        sampling_freq, audio = wavfile.read(input_file)

        mfcc_features = mfcc(audio, 16000)

        max_score = 0
        output_label = None

        for item in hmm_models:
            hmm_model, label = item

            score = hmm_model.get_score(mfcc_features)
            if score > max_score:
                max_score = score
                output_label = label

        print('\nTrue:', input_file[input_file.find('/') + 1:input_file.rfind('/')])
        print('Predicted:', output_label)

Original: https://blog.csdn.net/qq_45071353/article/details/123724616
Author: 初冬的早晨
Title: 语音识别器

原创文章受到原创版权保护。转载请注明出处:https://www.johngo689.com/45517/

转载文章受原作者版权保护。转载请注明原作者出处!

(0)

大家都在看

发表回复

登录后才能评论
免费咨询
免费咨询
扫码关注
扫码关注
联系站长

站长Johngo!

大数据和算法重度研究者!

持续产出大数据、算法、LeetCode干货,以及业界好资源!

2022012703491714

微信来撩,免费咨询:xiaozhu_tec

分享本页
返回顶部