1. Hotpots analysis
1.1 Hot functions
1.2 Hot loops
格式说明:
执行百分比:热点循环占本函数执行比例(热点循环占程序执行比例)
Explanation of Format:
Execution time percentage:Percentage of Execution time in current function(Percentage of Execution time in current program)
Total: 78%
Function 1: mgau_eval
me.L1.1.1
Execution time percentage:20.6%(7.7%)
me.L2.1
Execution time percentage:21.9%(8.2%)
me.L2.1.1
Execution time percentage:55.1%(20.7%)
Function 2: vector_gautbl_eval_logs3
vgel.L1
Execution time percentage:12.5%(3.9%)
vgel.L1.1
Execution time percentage:87.5%(27.5%)
Function 3: subvq_mgau_shortlist
sms.L1
Execution time percentage:53.7%(4.8%)
sms.L2
Execution time percentage:9.5%(0.8%)
Function 4: logs3_add
la.L1
Execution time percentage:90.4%(4.1%) (无for循环)
Function 5: approx_cont_mgau_frame_eval
acmfe.L1
Execution time percentage:71.7%(2.2%)
acmfe.L2
Execution time percentage:28.3%(0.9%)
Function 6: lextree_hmm_eval
lhe.L1
Execution time percentage:99%(2.6%) (含有函数调用)
Function 7: mdef_sseq2sen_active
msa.L1
Execution time percentage:100%(2.6%) (含有函数调用)
Function 8: approx_mgau_eval
ame.L1
Execution time percentage:90%(1.9%) (无for循环,含有函数调用)
Function 9: dict2pid_comsenscr
dc.L1.1
Execution time percentage:90%(1.6%) (含有函数调用)
Function 10: hmm_vit_eval_3st
hve.L1
Execution time percentage: (无for循环)
Function 11: lextree_enter
le.L1
Execution time percentage:90%(0.7%)
Function 12: fe_fft
ff.L1
Execution time percentage:23%(0.08%)
ff.L1.1.1
Execution time percentage:64%(0.37%)
Function 13: lextree_hmm_propagate
lhp.L1
Execution time percentage:31.7%(含有函数调用)
lhp.L2
Execution time percentage:51%(含有函数调用)
Function 14: dict2pid_comsseq2sen_active
dca.L1
Execution time percentage:100%(含有函数调用)
Function 15: lextree_ssid_active
lsa.L1
Execution time percentage:99%(0.19%)
Function 16: logs3_init
li.L1
Execution time percentage:100%(含有函数调用)
Function 17: utt_decode_block
udb.L1
Execution time percentage:40%(含有函数调用)
1.3 Hot code phase
/* Function 1: mgau_eval */
// me.L1.1.1
if (! active)
{
for (c = 0; c < mgau->n_comp-1; c += 2)
{
// me.L1.1
for (i = 0; i < veclen; i++)
{
diff1 = x[i] - m1[i];
dval1 -= diff1 * diff1 * v1[i];
diff2 = x[i] - m2[i];
dval2 -= diff2 * diff2 * v2[i];
}
}
}
// me.L2.1
if (! active)
else
{
for (j = 0; active[j] >= 0; j++)
{
c = active[j];
m1 = mgau->mean[c];
v1 = mgau->var[c];
dval1 = mgau->lrd[c];
if (dval1 < g->distfloor)
dval1 = g->distfloor;
score = logs3_add (score, (int32)(f * dval1) + mgau->mixw[c]);
}
}
// me.L2.1.1
if (! active)
else
{
for (j = 0; active[j] >= 0; j++)
{
for (i = 0; i < veclen; i++)
{
diff1 = x[i] - m1[i];
dval1 -= diff1 * diff1 * v1[i];
}
}
}
/* Function 2: vector_gautbl_eval_logs3 */
// vgel.L1
for (r = offset; r < end-1; r += 2)
{
m1 = gautbl->mean[r];
m2 = gautbl->mean[r+1];
v1 = gautbl->var[r];
v2 = gautbl->var[r+1];
dval1 = gautbl->lrd[r];
dval2 = gautbl->lrd[r+1];
// vgel.L1.1
for (i = 0; i < veclen; i++)
{
diff1 = x[i] - m1[i];
dval1 -= diff1 * diff1 * v1[i];
diff2 = x[i] - m2[i];
dval2 -= diff2 * diff2 * v2[i];
}
if (dval1 < gautbl->distfloor)
dval1 = gautbl->distfloor;
if (dval2 < gautbl->distfloor)
dval2 = gautbl->distfloor;
score[r] = (int32)(f * dval1);
score[r+1] = (int32)(f * dval2);
}
/* Function 3: subvq_mgau_shortlist */
// sms.L1
switch (vq->n_sv) {
case 3:
for (i = 0; i < n; i++)
{
if (VQ_EVAL == 1)
{
v = (int32) vqdist[*map];
map += 3;
} else
{
if (VQ_EVAL == 2)
{
v = vqdist[*(map++)];
v += 2 * vqdist[*map];
map += 2;
} else
{
v = vqdist[*(map++)];
v += vqdist[*(map++)];
v += vqdist[*(map++)];
}
}
gauscore[i] = v;
if (bv < v)
bv = v;
}
break;
// sms.L2
for (i = 0; i < n; i++)
{
if (gauscore[i] >= th)
sl[nc++] = i;
}
/* Function 4: logs3_add */
// la.L1
if (d < add_tbl_size)
{
if (USE_LOG3_ADD_TABLE)
r += add_tbl[d];
else
r += 0.5 + (float64) (log(1.0 + pow(F,d)) * invlogB);
}
/* Function 5: approx_cont_mgau_frame_eval */
// acmfe.L1
for (s = 0; s < g->n_mgau; s++)
{
is_compute = !sen_active || sen_active[s];
is_ciphone = (s==cd2cisen[s]);
if(!is_skip)
{
if(is_ciphone)
{
senscr[s]=cache_ci_senscr[s];
if (pbest < senscr[s])
pbest = senscr[s];
if (best < senscr[s])
best = senscr[s];
sen_active[s]=1;
ng+=mgau_n_comp(g,s);
ns++;
}else
{
if(is_compute)
{
if((pbest-senscr[cd2cisen[s]]<kb->ci_pbeam))
{
ng+=approx_mgau_eval (gs,svq,g,kb,s,senscr,feat,best_cid,svq_beam);
ns++;
}else
{
senscr[s]=senscr[cd2cisen[s]];
}
if (best < senscr[s])
best = senscr[s];
}
}
kb->rec_sen_active[s]=sen_active[s];
}else
{
...
}
}
// acmfe.L2
if(!is_skip)
{
for (s = 0; s < g->n_mgau; s++)
{
if(sen_active[s])
senscr[s]-=best;
}
}else
{
...
}
/* Function 6: lextree_hmm_eval */
// lhe.L1
if (fp) {
} else
{
if (n_st == 3) {
for (i = 0; i < lextree->n_active; i++)
{
ln = list[i];
assert (ln->frame == frm);
if (! ln->composite)
{
k = hmm_vit_eval_3st (&(ln->hmm), mdef->sseq[ln->ssid], ascr->sen);
}else
{
k = hmm_vit_eval_3st (&(ln->hmm), d2p->comsseq[ln->ssid], ascr->comsen);
}
if (best < k)
best = k;
if (IS_S3WID(ln->wid))
{
if (wbest < k)
wbest = k;
}
}
}
}else if
{
...
}
/* Function 7: mdef_sseq2sen_active */
// msa.L1
for (ss = 0; ss < mdef_n_sseq(mdef); ss++)
{
if (sseq[ss])
{
sp = mdef->sseq[ss];
for (i = 0; i < mdef_n_emit_state(mdef); i++)
sen[sp[i]] = 1;
}
}
/* Function 8: approx_mgau_eval */
// ame.L1
if(gs&&kb->gs4gs)
{
ng = gs_mgau_shortlist (gs, s, mgau_n_comp(g,s),feat,best_cid);
mgau_sl=gs->mgau_sl;
}else if (svq)
{
ng = subvq_mgau_shortlist (svq, s, mgau_n_comp(g,s), svq_beam);
mgau_sl=svq->mgau_sl;
}else{
ng = mgau_n_comp (g, s);
mgau_sl=NULL;
}
/* Function 9: dict2pid_comsenscr */
// dc.L1
for (i = 0; i < d2p->n_comstate; i++)
{
// dc.L1.1
for (j = 1;; j++)
{
k = comstate[j];
if (NOT_S3SENID(k))
break;
if (best < senscr[k])
best = senscr[k];
}
}
/* Function 10: hmm_vit_eval_3st */
// hve.L1
/* Function 11: lextree_enter */
// le.L1
for (gn = root; gn; gn = gnode_next(gn))
{
ln = (lextree_node_t *) gnode_ptr (gn);
hmm = &(ln->hmm);
scr = inscore + ln->prob;
if ((scr >= thresh) && (hmm->in.score < scr))
{
hmm->in.score = scr;
hmm->in.history = inhist;
if (ln->frame != nf)
{
ln->frame = nf;
lextree->next_active[n++] = ln;
}
}
}
/* Function 12: fe_fft */
// ff.L1
for (s = 0; s<N; s++)
{
from[s].r = in[s].r/div;
from[s].i = in[s].i/div;
}
// ff.L1
for (k = N/2; k > 0; k /= 2)
{
for (s = 0; s < k; s++)
{
// ff.L1.1.1
while (ww < wEnd)
{
wwf2.r = f2->r*ww->r - f2->i*ww->i;
wwf2.i = f2->r*ww->i + f2->i*ww->r;
/* t1 = f1+wwf2 */
t1->r = f1->r + wwf2.r;
t1->i = f1->i + wwf2.i;
/* t2 = f1-wwf2 */
t2->r = f1->r - wwf2.r;
t2->i = f1->i - wwf2.i;
/* increment */
f1 += 2*k; f2 += 2*k;
t1 += k; t2 += k;
ww += k;
}
}
}
/* Function 13: lextree_hmm_propagate */
// lhp.L1
/* Function 14: dict2pid_comsseq2sen_active */
// dca.L1
for (ss = 0; ss < d2p->n_comsseq; ss++) {
if (comssid[ss]) {
csp = d2p->comsseq[ss];
for (i = 0; i < mdef_n_emit_state(mdef); i++)
{
cs = csp[i];
sp = d2p->comstate[cs];
for (j = 0; IS_S3SENID(sp[j]); j++)
sen[sp[j]] = 1;
}
}
}
/* Function 15: lextree_ssid_active */
// lsa.L1
for (i = 0; i < lextree->n_active; i++)
{
ln = list[i];
if (ln->composite)
comssid[ln->ssid] = 1;
else
ssid[ln->ssid] = 1;
}