一、简介
现在有越来越多的公司和团体开始使用chain model了,得益于kaldi社区日益活跃和kaldi作者povey的大力推荐,chain model的优越性在于:1,使用了单状态的biphone,建模粒度更大,有些类似于CTC;2,采用的低帧率策略,DNN每三帧输出一次,解码速度更快;3,使用了区分性训练,准确率更高;4,改进了MMI,提出了Lattice free MMI,训练速度更快。
二、源码解析
我们结合kaldi的源码,来逐步分析chain model的训练过程。
#include "chain/chain-numerator.h"
#include "cudamatrix/cu-vector.h"
namespace kaldi {
namespace chain {
//传入标签和前向计算的结果
NumeratorComputation::NumeratorComputation(
const Supervision &supervision,
const CuMatrixBase<BaseFloat> &nnet_output):
supervision_(supervision),
nnet_output_(nnet_output) {
//计算分子fst中每个状态对应的时间点
ComputeFstStateTimes(supervision_.fst, &fst_state_times_);
KALDI_ASSERT(supervision.num_sequences * supervision.frames_per_sequence ==
nnet_output.NumRows() &&
supervision.label_dim == nnet_output.NumCols());
}
void NumeratorComputation::ComputeLookupIndexes() {
int32 num_states = supervision_.fst.NumStates();
int32 num_arcs_guess = num_states * 2;
fst_output_indexes_.reserve(num_arcs_guess);
int32 frames_per_sequence = supervision_.frames_per_sequence,
num_sequences = supervision_.num_sequences,
cur_time = 0;
// the following is a CPU version of nnet_output_indexes_. It is a list of
// pairs (row-index, column-index) which index nnet_output_. The row-index
// corresponds to the time-frame 't', and the column-index the pdf-id, but we
// have to be a little careful with the row-index because there is a
// reordering that happens if su