Benchmark Analysis 7:SPEC2006.482sphinx

1. Hotpots analysis
1.1 Hot functions
在这里插入图片描述
1.2 Hot loops
格式说明:
执行百分比:热点循环占本函数执行比例(热点循环占程序执行比例)
Explanation of Format:
Execution time percentage:Percentage of Execution time in current function(Percentage of Execution time in current program)

Total: 78%

Function 1: mgau_eval
me.L1.1.1
Execution time percentage:20.6%(7.7%)
me.L2.1
Execution time percentage:21.9%(8.2%)
me.L2.1.1
Execution time percentage:55.1%(20.7%)

Function 2: vector_gautbl_eval_logs3
vgel.L1
Execution time percentage:12.5%(3.9%)
vgel.L1.1
Execution time percentage:87.5%(27.5%)

Function 3: subvq_mgau_shortlist
sms.L1
Execution time percentage:53.7%(4.8%)
sms.L2
Execution time percentage:9.5%(0.8%)

Function 4: logs3_add
la.L1
Execution time percentage:90.4%(4.1%) (无for循环)

Function 5: approx_cont_mgau_frame_eval
acmfe.L1
Execution time percentage:71.7%(2.2%)
acmfe.L2
Execution time percentage:28.3%(0.9%)

Function 6: lextree_hmm_eval
lhe.L1
Execution time percentage:99%(2.6%) (含有函数调用)

Function 7: mdef_sseq2sen_active
msa.L1
Execution time percentage:100%(2.6%) (含有函数调用)

Function 8: approx_mgau_eval
ame.L1
Execution time percentage:90%(1.9%) (无for循环,含有函数调用)

Function 9: dict2pid_comsenscr
dc.L1.1
Execution time percentage:90%(1.6%) (含有函数调用)

Function 10: hmm_vit_eval_3st
hve.L1
Execution time percentage: (无for循环)

Function 11: lextree_enter
le.L1
Execution time percentage:90%(0.7%)

Function 12: fe_fft
ff.L1
Execution time percentage:23%(0.08%)
ff.L1.1.1
Execution time percentage:64%(0.37%)

Function 13: lextree_hmm_propagate
lhp.L1
Execution time percentage:31.7%(含有函数调用)
lhp.L2
Execution time percentage:51%(含有函数调用)

Function 14: dict2pid_comsseq2sen_active
dca.L1
Execution time percentage:100%(含有函数调用)

Function 15: lextree_ssid_active
lsa.L1
Execution time percentage:99%(0.19%)

Function 16: logs3_init
li.L1
Execution time percentage:100%(含有函数调用)

Function 17: utt_decode_block
udb.L1
Execution time percentage:40%(含有函数调用)

1.3 Hot code phase

/*   Function 1: mgau_eval  */
// me.L1.1.1
if (! active) 
{
	for (c = 0; c < mgau->n_comp-1; c += 2) 
	{
		// me.L1.1
		for (i = 0; i < veclen; i++) 
		{
			diff1 = x[i] - m1[i];
			dval1 -= diff1 * diff1 * v1[i];
			diff2 = x[i] - m2[i];
			dval2 -= diff2 * diff2 * v2[i];
		}
	}
}
// me.L2.1
if (! active) 
else
{
	for (j = 0; active[j] >= 0; j++) 
	{
		c = active[j];
	    m1 = mgau->mean[c];
	    v1 = mgau->var[c];
	    dval1 = mgau->lrd[c];
	    if (dval1 < g->distfloor)
			dval1 = g->distfloor;
	    score = logs3_add (score, (int32)(f * dval1) + mgau->mixw[c]); 
	}
}
// me.L2.1.1
if (! active) 
else
{
	for (j = 0; active[j] >= 0; j++) 
	{
		for (i = 0; i < veclen; i++) 
		{
			diff1 = x[i] - m1[i];
			dval1 -= diff1 * diff1 * v1[i];
	    }
	}
}


/*  Function 2: vector_gautbl_eval_logs3   */
// vgel.L1
for (r = offset; r < end-1; r += 2) 
{
	m1 = gautbl->mean[r];
	m2 = gautbl->mean[r+1];
	v1 = gautbl->var[r];
	v2 = gautbl->var[r+1];
	dval1 = gautbl->lrd[r];
	dval2 = gautbl->lrd[r+1];
	//  vgel.L1.1
	for (i = 0; i < veclen; i++) 
	{
	    diff1 = x[i] - m1[i];
	    dval1 -= diff1 * diff1 * v1[i];
	    diff2 = x[i] - m2[i];
	    dval2 -= diff2 * diff2 * v2[i];
	}
	
	if (dval1 < gautbl->distfloor)
	    dval1 = gautbl->distfloor;
	if (dval2 < gautbl->distfloor)
	    dval2 = gautbl->distfloor;
	score[r] = (int32)(f * dval1);
	score[r+1] = (int32)(f * dval2);
}

/*  Function 3: subvq_mgau_shortlist   */
// sms.L1
switch (vq->n_sv) {
case 3:
for (i = 0; i < n; i++) 
{
	if (VQ_EVAL == 1) 
	{
    	v = (int32) vqdist[*map];
    	map += 3;
  	} else 
  	{
    	if (VQ_EVAL == 2) 
    	{
			v = vqdist[*(map++)];
      		v += 2 * vqdist[*map]; 
      		map += 2;
    	} else 
    	{
      		v = vqdist[*(map++)];
      		v += vqdist[*(map++)]; 
      		v += vqdist[*(map++)]; 
    	}
  }
  gauscore[i] = v;
    
  if (bv < v)
	bv = v;
}
break;
	
// sms.L2
for (i = 0; i < n; i++) 
{
	if (gauscore[i] >= th)
		sl[nc++] = i;
}


/*   Function 4: logs3_add  */
// la.L1
if (d < add_tbl_size)
{
    if (USE_LOG3_ADD_TABLE) 
		r += add_tbl[d];
    else
		r += 0.5 + (float64) (log(1.0 + pow(F,d)) * invlogB); 
}

/*  Function 5: approx_cont_mgau_frame_eval   */
// acmfe.L1
for (s = 0; s < g->n_mgau; s++) 
{
	is_compute = !sen_active || sen_active[s];
  	is_ciphone  =  (s==cd2cisen[s]);
  	if(!is_skip)
  	{ 
    	if(is_ciphone)
    	{
			senscr[s]=cache_ci_senscr[s];
			
			if (pbest < senscr[s]) 
				pbest = senscr[s];
			if (best < senscr[s]) 
				best = senscr[s];
				
			sen_active[s]=1;
			ng+=mgau_n_comp(g,s); 
			ns++;
		}else
		{
			if(is_compute) 
			{
	  			if((pbest-senscr[cd2cisen[s]]<kb->ci_pbeam))
	  			{
	    			ng+=approx_mgau_eval (gs,svq,g,kb,s,senscr,feat,best_cid,svq_beam);
	    			ns++;
	  			}else 
	  			{
	    			senscr[s]=senscr[cd2cisen[s]]; 
	  			}
	  			if (best < senscr[s]) 
	  				best = senscr[s];
			}
      	}
      	kb->rec_sen_active[s]=sen_active[s];
	
	}else
	{ 
		...
    }
}

// acmfe.L2
if(!is_skip)
{
	for (s = 0; s < g->n_mgau; s++)
	{
      	if(sen_active[s])
			senscr[s]-=best;
    }
}else
{	
	...
}


/*  Function 6: lextree_hmm_eval   */
// lhe.L1
if (fp) {
} else 
{
	if (n_st == 3) {
		for (i = 0; i < lextree->n_active; i++) 
		{
			ln = list[i];
			assert (ln->frame == frm);
		
			if (! ln->composite)
		  	{
		  		k = hmm_vit_eval_3st (&(ln->hmm), mdef->sseq[ln->ssid], ascr->sen);
		  	}else
		  	{
		   		k = hmm_vit_eval_3st (&(ln->hmm), d2p->comsseq[ln->ssid], ascr->comsen);
			}
			if (best < k)
		    	best = k;
			if (IS_S3WID(ln->wid)) 
			{
		    	if (wbest < k)
					wbest = k;
			}
	    }
	}
}else if
{
	...
} 

/*  Function 7: mdef_sseq2sen_active   */
// msa.L1
for (ss = 0; ss < mdef_n_sseq(mdef); ss++) 
{
	if (sseq[ss]) 
	{
	 	sp = mdef->sseq[ss];
	 	for (i = 0; i < mdef_n_emit_state(mdef); i++)
			sen[sp[i]] = 1;
	}
}

/*   Function 8: approx_mgau_eval */
// ame.L1
if(gs&&kb->gs4gs)
{
	ng = gs_mgau_shortlist (gs, s, mgau_n_comp(g,s),feat,best_cid);
  	mgau_sl=gs->mgau_sl;
}else if (svq)
{
  	ng = subvq_mgau_shortlist (svq, s, mgau_n_comp(g,s), svq_beam);
  	mgau_sl=svq->mgau_sl;
}else{
  	ng = mgau_n_comp (g, s);
 	mgau_sl=NULL;
}

/*  Function 9: dict2pid_comsenscr   */
// dc.L1
for (i = 0; i < d2p->n_comstate; i++) 
{
	// dc.L1.1
	for (j = 1;; j++) 
	{
	    k = comstate[j];
	    if (NOT_S3SENID(k))
		break;
	    if (best < senscr[k])
		best = senscr[k];
	}
}

/*  Function 10: hmm_vit_eval_3st   */
// hve.L1

/*  Function 11: lextree_enter   */
// le.L1
for (gn = root; gn; gn = gnode_next(gn)) 
{
	ln = (lextree_node_t *) gnode_ptr (gn);	
	hmm = &(ln->hmm);
	scr = inscore + ln->prob;
	if ((scr >= thresh) && (hmm->in.score < scr)) 
	{
	    hmm->in.score = scr;
	    hmm->in.history = inhist;
	    	    
	    if (ln->frame != nf) 
	    {
			ln->frame = nf;
			lextree->next_active[n++] = ln;
	    }
	} 
}

/*   Function 12: fe_fft */
// ff.L1
for (s = 0; s<N; s++)
{
    from[s].r = in[s].r/div;
    from[s].i = in[s].i/div;
}

// ff.L1
for (k = N/2; k > 0; k /= 2)
{
    for (s = 0; s < k; s++)
    {	
    	// ff.L1.1.1			
      	while (ww < wEnd)
      	{
	        wwf2.r = f2->r*ww->r - f2->i*ww->i;
	        wwf2.i = f2->r*ww->i + f2->i*ww->r;
	        /* t1 = f1+wwf2							*/
	        t1->r = f1->r + wwf2.r;
	        t1->i = f1->i + wwf2.i;
	        /* t2 = f1-wwf2							*/
	        t2->r = f1->r - wwf2.r;
	        t2->i = f1->i - wwf2.i;
	        /* increment							*/
	        f1 += 2*k; f2 += 2*k;
	        t1 += k; t2 += k;
	        ww += k;
      	}
    }
}


/*  Function 13: lextree_hmm_propagate   */
// lhp.L1


/*  Function 14: dict2pid_comsseq2sen_active  */
// dca.L1
for (ss = 0; ss < d2p->n_comsseq; ss++) {
	if (comssid[ss]) {
	    csp = d2p->comsseq[ss];

	    for (i = 0; i < mdef_n_emit_state(mdef); i++) 
	    {
			cs = csp[i];
			sp = d2p->comstate[cs];
			
			for (j = 0; IS_S3SENID(sp[j]); j++)
			    sen[sp[j]] = 1;
	    }
	}
}

/*  Function 15: lextree_ssid_active  */
// lsa.L1
 for (i = 0; i < lextree->n_active; i++) 
 {
	ln = list[i];
	if (ln->composite)
	    comssid[ln->ssid] = 1;
	else
	    ssid[ln->ssid] = 1;
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值