import librosa 出错了??

问题复现如下:

>>> import librosa
Traceback (most recent call last):
  File "<pyshell#2>", line 1, in <module>
    import librosa
  File "D:\python36\lib\site-packages\librosa\__init__.py", line 11, in <module>
    from . import cache
  File "D:\python36\lib\site-packages\librosa\cache.py", line 7, in <module>
    from joblib import Memory
  File "D:\python36\lib\site-packages\joblib\__init__.py", line 113, in <module>
    from .memory import Memory, MemorizedResult, register_store_backend
  File "D:\python36\lib\site-packages\joblib\memory.py", line 32, in <module>
    from ._store_backends import StoreBackendBase, FileSystemStoreBackend
  File "D:\python36\lib\site-packages\joblib\_store_backends.py", line 1
import numpy as np from hmmlearn import hmm from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from tqdm import tqdm import os import librosa # 特征提取函数 def extract_features(file_path, max_length=None): y, sr = librosa.load(file_path, sr=None) mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) if max_length is not None: mfccs = mfccs[:, :max_length] # 截断或填充到max_length return mfccs # 不转置 # 读取数据集 def load_data(folder_paths, max_length=None): data = [] labels = [] for label, folder in enumerate(folder_paths): folder_path = os.path.join('D:/第二章/dataset/speech_commands_v0.02', folder) # 替换为你的数据集路径 files = os.listdir(folder_path) for file in tqdm(files, desc=f'Processing {folder}', unit='file'): if file.endswith('.wav'): # 假设文件是wav格式 file_path = os.path.join(folder_path, file) features = extract_features(file_path, max_length) data.append(features) labels.append(label) # 计算最大特征长度 if max_length is None: max_length = max(features.shape[1] for features in data) # 统一特征长度 padded_data = [] for features in data: num_cols = max_length - features.shape[1] pad_width = ((0, 0), (0, num_cols)) # 仅在列方向上填充 padded_features = np.pad(features, pad_width, mode='constant') padded_data.append(padded_features) return np.array(padded_data), np.array(labels) # 训练多项式 HMM 模型 def train_multinomial_hmm(features, n_components=3): if n_components <= 0: raise ValueError("n_components must be a positive integer") # 创建 MultinomialHMM 模型 model = hmm.MultinomialHMM(n_components=n_components, n_iter=100) # 训练模型 model.fit(features) return model # 评估模型 def evaluate_model(model, X_test, y_test): predictions = model.predict(X_test) accuracy = accuracy_score(y_test, predictions) print(f'Accuracy: {accuracy:.2f }') 这个代码报的错
最新发布
03-22
import torch import torch.nn as nn import librosa import numpy as np from torch.utils.data import Dataset , DataLoader from nemo.collections.tts.models import Tacotron2Model from nemo.collections.tts.models import WaveGlowModel import os # 配置参数 config = { "sr": 22050 , # 采样率 "batch_size": 8 , # 根据显存调整 "num_epochs": 500 , "gpu_id": 0 , "mel_dim": 80 , # 梅尔频谱维度 "text_embed_dim": 512 , # 文本编码维度 "max_text_len": 100 # 最大文本长度 } # 自定义数据集 class VoiceDataset(Dataset): def __init__(self , data_dir): self.files = [os.path.join(data_dir , f) for f in os.listdir(data_dir)] def __len__(self): return len(self.files) def __getitem__(self , idx): # 加载音频并转换为梅尔频谱 audio , sr = librosa.load(self.files[idx] , sr = config['sr']) mel = librosa.feature.melspectrogram( y = audio , sr = sr , n_mels = config['mel_dim']) mel = librosa.power_to_db(mel) # 生成随机文本嵌入(实际应使用真实文本) text_embed = torch.randn(config['text_embed_dim']) return { "mel": torch.FloatTensor(mel.T) , # (time, n_mels) "text": text_embed } # 初始化模型 device = torch.device(f"cuda:{config['gpu_id']}") class VoiceGenerator(nn.Module): def __init__(self): super().__init__() # 文本编码器 self.text_encoder = nn.Sequential( nn.Linear(config['text_embed_dim'] , 256) , nn.ReLU() , nn.Linear(256 , 512) ) # 声学模型(基于Tacotron2简化版) self.tacotron = Tacotron2Model.from_pretrained("tts_en_tacotron2").encoder # 声码器(基于WaveGlow简化版) self.vocoder = WaveGlowModel.from_pretrained("tts_waveglow_88m").generator def forward(self , text): # 文本编码 text_feat = self.text_encoder(text) # 生成梅尔频谱 mel_outputs , _ = self.tacotron(text_feat) # 生成波形 audio = self.vocoder(mel_outputs) return audio # 初始化系统 model = VoiceGenerator().to(device) optimizer = torch.optim.AdamW(model.parameters() , lr = 3e-4) criterion = nn.MSELoss() # 数据加载 dataset = VoiceDataset("training_data/sliced_audio") loader = DataLoader(dataset , batch_size = config['batch_size'] , shuffle = True) # 训练循环 for epoch in range(config['num_epochs']): for batch in loader: mels = batch['mel'].to(device) texts = batch['text'].to(device) # 前向传播 gen_audio = model(texts) # 计算损失 loss = criterion(gen_audio , mels) # 反向传播 optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters() , 1.0) optimizer.step() # 保存检查点 if epoch % 50 == 0: torch.save(model.state_dict() , f"voice_model_epoch{epoch}.pt") print(f"Epoch {epoch} | Loss: {loss.item():.4f}")X
03-20
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小李飞刀李寻欢

您的欣赏将是我奋斗路上的动力!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值