2021-02-02 keras 可以改 loss（从 input 或者 tensor）

本文链接：https://blog.youkuaiyun.com/funkidd/article/details/113571754

本文介绍了一种使用深度学习技术进行语音增强的方法。通过加载混响和清晰语音数据，将其转换为频谱并提取特征，然后利用定制损失函数训练模型以提高语音质量。该模型采用多层全连接网络结构，并在训练过程中调整学习率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

from keras.callbacks import LearningRateScheduler

from keras.layers import Multiply

import keras
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import SimpleRNN
from keras.layers import Dropout
from keras.layers import concatenate
from keras import losses
from keras import regularizers
from keras.constraints import min_max_norm
import h5py

from keras.constraints import Constraint
from keras import backend as K
import numpy as np

import os
import sys
import tensorflow as tf

# def generateDataset():
mix, sr = librosa.load("./mix.wav", sr=16000)
clean,sr = librosa.load("./clean.wav", sr=16000)

win_length = 256
hop_length = 128
nfft = 512

mix_spectrum = librosa.stft(mix, win_length=win_length, hop_length=hop_length, n_fft=nfft)
clean_spectrum = librosa.stft(clean, win_length=win_length, hop_length=hop_length, n_fft=nfft)

mix_mag = np.abs(mix_spectrum).T
clean_mag = np.abs(clean_spectrum).T

mix_phase = np.angle(mix_spectrum).T
clean_phase = np.angle(clean_spectrum).T

frame_num = mix_mag.shape[0] - 4
feature = np.zeros([frame_num, 257*5])
k = 0
for i in range(frame_num - 4):
frame = mix_mag[k:k+5]
feature[i] = np.reshape(frame, 257*5)
k += 1

#PSM
snr = np.divide(clean_mag, mix_mag) * np.cos(clean_phase - mix_phase)
label = snr[2:-2]

ref = np.cos(clean_phase - mix_phase)
ref = ref[2:-2]

ss = StandardScaler()
feature = ss.fit_transform(feature)

print(feature.shape)
print(label.shape)

window_size = 2000
nb_sequences = feature.shape[0]//window_size

feature = feature[:nb_sequences*window_size, :1285]
feature = np.reshape(feature, (nb_sequences, window_size, 1285))

label = label[:nb_sequences*window_size, :257]
label = np.reshape(label, (nb_sequences, window_size, 257))

ref = ref[:nb_sequences*window_size, :257]
ref = np.reshape(ref, (nb_sequences, window_size, 257))

ref = ref * label

print(feature.shape)
print(label.shape)

def scheduler(epochs):
# 每隔1个epoch，学习率减小为原来的1/10
lr = K.get_value(model.optimizer.lr)
if epochs < 20:
K.set_value(model.optimizer.lr, 0.00001)
if epochs > 20:
K.set_value(model.optimizer.lr, 0.00001)
lr = K.get_value(model.optimizer.lr)
print("lr changed to {}".format(lr ))
return K.get_value(model.optimizer.lr)

reduce_lr = LearningRateScheduler(scheduler)

def mycost(k_input):
def custom_loss(y_true, y_pred):
k = tf.convert_to_tensor(ref)
bbb = K.mean( K.square(K.sqrt(y_pred) - K.sqrt( k)), axis=-1 )
return bbb
return custom_loss

feature_input = Input(shape=(None, 1285), name = 'feature_input')

dense1 = Dense(2048)(feature_input)
norm1 = BatchNormalization()(dense1)
relu1 = LeakyReLU(alpha=0.1)(norm1)
drop1 = Dropout(0.1)(relu1)

dense2 = Dense(2048)(drop1)
norm2 = BatchNormalization()(dense2)
relu2 = LeakyReLU(alpha=0.1)(norm2)
drop2 = Dropout(0.1)(relu2)

dense3 = Dense(2048)(drop2)
norm3 = BatchNormalization()(dense3)
relu3 = LeakyReLU(alpha=0.1)(norm3)
drop3 = Dropout(0.1)(relu3)

dense4 = Dense(257)(drop3)
norm4 = BatchNormalization()(dense4)
out = Activation('sigmoid')(norm4)

model = Model([feature_input] , out)

# My Cost
model.compile(optimizer='adam',
loss= mycost(model.input[1]),
metrics=['mse'])

midWeightPath = './mid_weights.hdf5'
endEpochHookCB = keras.callbacks.ModelCheckpoint( midWeightPath,
monitor = 'val_loss',
save_weights_only = True,
verbose = 1,
save_best_only = False,
period = 1 )

if os.path.exists(midWeightPath):
model.load_weights( midWeightPath )
print("checkpoint_loaded")

model.fit([feature] ,
label,
batch_size=64, epochs=10,
callbacks =[endEpochHookCB,reduce_lr])
model.save("./model.hdf5")