from keras.callbacks import LearningRateScheduler
from keras.layers import Multiply
import keras
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import SimpleRNN
from keras.layers import Dropout
from keras.layers import concatenate
from keras import losses
from keras import regularizers
from keras.constraints import min_max_norm
import h5py
from keras.constraints import Constraint
from keras import backend as K
import numpy as np
import os
import sys
import tensorflow as tf
# def generateDataset():
mix, sr = librosa.load("./mix.wav", sr=16000)
clean,sr = librosa.load("./clean.wav", sr=16000)
win_length = 256
hop_length = 128
nfft = 512
mix_spectrum = librosa.stft(mix, win_length=win_length, hop_length=hop_length, n_fft=nfft)
clean_spectrum = librosa.stft(clean, win_length=win_length, hop_length=hop_length, n_fft=nfft)
mix_mag = np.abs(mix_spectrum).T
clean_mag = np.abs(clean_spectrum).T
mix_phase = np.angle(mix_spectrum).T
clean_phase = np.angle(clean_spectrum).T
frame_num = mix_mag.shape[0] - 4
feature = np.zeros([frame_num, 257*5])
k = 0
for i in range(frame_num - 4):
frame = mix_mag[k:k+5]
feature[i] = np.reshape(frame, 257*5)
k += 1
#PSM
snr = np.divide(clean_mag, mix_mag) * np.cos(clean_phase - mix_phase)
label = snr[2:-2]
ref = np.cos(clean_phase - mix_phase)
ref = ref[2:-2]
ss = StandardScaler()
feature = ss.fit_transform(feature)
print(feature.shape)
print(label.shape)
window_size = 2000
nb_sequences = feature.shape[0]//window_size
feature = feature[:nb_sequences*window_size, :1285]
feature = np.reshape(feature, (nb_sequences, window_size, 1285))
label = label[:nb_sequences*window_size, :257]
label = np.reshape(label, (nb_sequences, window_size, 257))
ref = ref[:nb_sequences*window_size, :257]
ref = np.reshape(ref, (nb_sequences, window_size, 257))
ref = ref * label
print(feature.shape)
print(label.shape)
def scheduler(epochs):
# 每隔1个epoch,学习率减小为原来的1/10
lr = K.get_value(model.optimizer.lr)
if epochs < 20:
K.set_value(model.optimizer.lr, 0.00001)
if epochs > 20:
K.set_value(model.optimizer.lr, 0.00001)
lr = K.get_value(model.optimizer.lr)
print("lr changed to {}".format(lr ))
return K.get_value(model.optimizer.lr)
reduce_lr = LearningRateScheduler(scheduler)
def mycost(k_input):
def custom_loss(y_true, y_pred):
k = tf.convert_to_tensor(ref)
bbb = K.mean( K.square(K.sqrt(y_pred) - K.sqrt( k)), axis=-1 )
return bbb
return custom_loss
feature_input = Input(shape=(None, 1285), name = 'feature_input')
dense1 = Dense(2048)(feature_input)
norm1 = BatchNormalization()(dense1)
relu1 = LeakyReLU(alpha=0.1)(norm1)
drop1 = Dropout(0.1)(relu1)
dense2 = Dense(2048)(drop1)
norm2 = BatchNormalization()(dense2)
relu2 = LeakyReLU(alpha=0.1)(norm2)
drop2 = Dropout(0.1)(relu2)
dense3 = Dense(2048)(drop2)
norm3 = BatchNormalization()(dense3)
relu3 = LeakyReLU(alpha=0.1)(norm3)
drop3 = Dropout(0.1)(relu3)
dense4 = Dense(257)(drop3)
norm4 = BatchNormalization()(dense4)
out = Activation('sigmoid')(norm4)
model = Model([feature_input] , out)
# My Cost
model.compile(optimizer='adam',
loss= mycost(model.input[1]),
metrics=['mse'])
midWeightPath = './mid_weights.hdf5'
endEpochHookCB = keras.callbacks.ModelCheckpoint( midWeightPath,
monitor = 'val_loss',
save_weights_only = True,
verbose = 1,
save_best_only = False,
period = 1 )
if os.path.exists(midWeightPath):
model.load_weights( midWeightPath )
print("checkpoint_loaded")
model.fit([feature] ,
label,
batch_size=64, epochs=10,
callbacks =[endEpochHookCB,reduce_lr])
model.save("./model.hdf5")
本文介绍了一种使用深度学习技术进行语音增强的方法。通过加载混响和清晰语音数据,将其转换为频谱并提取特征,然后利用定制损失函数训练模型以提高语音质量。该模型采用多层全连接网络结构,并在训练过程中调整学习率。
8838

被折叠的 条评论
为什么被折叠?



