by 云龙
HTK book中提到的参数有11种:
"LPC", "LPREFC", "LPCEPSTRA", "LPDELCEP", "IREFC", "MFCC", "FBANK", "MELSPEC","DISCRETE", "PLP","ANON"
但HTK3.4中是否都支持呢?
请看ConvertFrame()函数中的以下代码:
switch(btgt){
case LPC:
Wave2LPC(cf->s,cf->a,cf->k,&re,&te);
v = cf->a; bsize = cf->lpcOrder;
break;
case LPREFC:
Wave2LPC(cf->s,cf->a,cf->k,&re,&te);
v = cf->k; bsize = cf->lpcOrder;
break;
case LPCEPSTRA:
Wave2LPC(cf->s,cf->a,cf->k,&re,&te);
LPC2Cepstrum(cf->a,cf->c);
if (cf->cepLifter > 0)
WeightCepstrum(cf->c, 1, cf->numCepCoef, cf->cepLifter);
v = cf->c; bsize = cf->numCepCoef;
break;
case MELSPEC:
case FBANK:
Wave2FBank(cf->s, cf->fbank, rawE?NULL:&te, cf->fbInfo);
v = cf->fbank; bsize = cf->numChans;
break;
case MFCC:
Wave2FBank(cf->s, cf->fbank, rawE?NULL:&te, cf->fbInfo);
FBank2MFCC(cf->fbank, cf->c, cf->numCepCoef);
if (cf->cepLifter > 0)
WeightCepstrum(cf->c, 1, cf->numCepCoef, cf->cepLifter);
v = cf->c; bsize = cf->numCepCoef;
break;
case PLP:
Wave2FBank(cf->s, cf->fbank, rawE ? NULL : &te, cf->fbInfo);
FBank2ASpec(cf->fbank, cf->as, cf->eql, cf->compressFact, cf->fbInfo);
ASpec2LPCep(cf->as, cf->ac, cf->lp, cf->c, cf->cm);
if (cf->cepLifter > 0)
WeightCepstrum(cf->c, 1, cf->numCepCoef, cf->cepLifter);
v = cf->c;
bsize = cf->numCepCoef;
break;
default:
HError(6321,"ConvertFrame: target %s is not a parameterised form",
ParmKind2Str(cf->tgtPK,buf));
}
可以看出HTK3.4支持7中参数:LPC,LPREFC,LPCEPSTRA,MELSPEC,FBANK,MFCC,PLP。
参数转换顺序可以参照HTK book Fig.5.9:
IOConfigRec数据结构存放着很多参数,在特征提取中:
typedef struct {
/* ------- Overrideable parameters ------- */
ParmKind srcPK; /* Source ParmKind */
FileFormat srcFF; /* Source File format */
HTime srcSampRate; /* Source Sample Rate */
Boolean zMeanSrc; /* Zero Mean the Source */
ParmKind tgtPK; /* Target ParmKind */
FileFormat tgtFF; /* Target File format */
......
}IOConfigRec;
ValidCodeParms()函数检查analysis.conf的参数是否合理。
/* ValidCodeParms: check to ensure reasonable wave->parm code params */
static void ValidCodeParms(IOConfig cf)
/* SetUpForCoding: set style, sizes and working storage */
static void SetUpForCoding(MemHeap *x, IOConfig cf, int frSize)
ValidConversion()函数检查原格式到目标格式的转换是否 可能完成。
/* EXPORT->ValidConversion: checks that src -> tgt conversion is possible */
Boolean ValidConversion (ParmKind src, ParmKind tgt)
TotalComps()函数返回 特征参数的 维度。
/* TotalComps: return the total number of components in a parameter vector
with nStatic components and ParmKind pk */
static int TotalComps(int nStatic, ParmKind pk)
在OpenAsChannel()函数中,计算特征参数所需的内存空间:
dBytes = cf->nCols * pbuf->main.maxRows * sizeof(float);
= 39 * 243 * 4 = 37908
在提取特征参数FillBufFromChannel()函数前,调用了StartBuffer()函数,那么StartBuffer()函数有什么作用呢?
/* EXPORT->StartBuffer: start audio and fill the buffer */
void StartBuffer(ParmBuf pbuf)