GetMatch

template < typename STATE >
int ExtRanker_T<STATE>::GetMatches ()
{
	if ( !m_pRoot )
		return 0;

	int iMatches = 0;
	const ExtHit_t * pHlist = m_pHitlist;
	const ExtDoc_t * pDocs = m_pDoclist;

	// warmup if necessary
	if ( !pHlist )
	{
		if ( !pDocs ) pDocs = GetFilteredDocs ();
		if ( !pDocs ) return iMatches;

		pHlist = m_pRoot->GetHitsChunk ( pDocs, m_uMaxID );
		if ( !pHlist ) return iMatches;
	}

	// main matching loop
	const ExtDoc_t * pDoc = pDocs;
	for ( SphDocID_t uCurDocid=0; iMatches<ExtNode_i::MAX_DOCS; )
	{
		// keep ranking
		while ( pHlist->m_uDocid==uCurDocid )
			//计算
		 	m_tState.Update ( pHlist++ );

		// if hits block is over, get next block, but do *not* flush current doc
		if ( pHlist->m_uDocid==DOCID_MAX )
		{
			assert ( pDocs );
			pHlist = m_pRoot->GetHitsChunk ( pDocs, m_uMaxID );
			if ( pHlist )
				continue;
		}

		// otherwise (new match or no next hits block), flush current doc
		if ( uCurDocid )
		{
			assert ( uCurDocid==pDoc->m_uDocid );
			Swap ( m_dMatches[iMatches], m_dMyMatches[pDoc-m_dMyDocs] );
			m_dMatches[iMatches].m_iWeight = m_tState.Finalize ( m_dMatches[iMatches] );
			iMatches++;
		}

		// boundary checks
		if ( !pHlist )
		{
			// there are no more hits for current docs block; do we have a next one?
			assert ( pDocs );
			pDoc = pDocs = GetFilteredDocs ();

			// we don't, so bail out
			if ( !pDocs )
				break;

			// we do, get some hits
			pHlist = m_pRoot->GetHitsChunk ( pDocs, m_uMaxID );
			assert ( pHlist ); // fresh docs block, must have hits
		}

		// skip until next good doc/hit pair
		assert ( pDoc->m_uDocid<=pHlist->m_uDocid );
		while ( pDoc->m_uDocid<pHlist->m_uDocid ) pDoc++;
		assert ( pDoc->m_uDocid==pHlist->m_uDocid );

		uCurDocid = pHlist->m_uDocid;
	}

	m_pDoclist = pDocs;
	m_pHitlist = pHlist;
	return iMatches;
}

通过下边这段代码可以发现;

		while ( pHlist->m_uDocid==uCurDocid )
			//计算
		 	m_tState.Update ( pHlist++ );
如果pHlist指针指向的ExtHit_t结构的id,是这个id,就一直调用Update函数,来填充m_tState的数据,他最终存储的是一个id的综合评分。

/// hit in the stream
struct ExtHit_t
{
	SphDocID_t	m_uDocid;
	Hitpos_t	m_uHitpos;
	WORD		m_uQuerypos;
	WORD		m_uNodepos;
	WORD		m_uSpanlen;
	WORD		m_uMatchlen;
	DWORD		m_uWeight;
};
ExtHit_t的结构如上,m_uDocid存储id信息。而m_uHitpos字段存储了很多信息,哪个字段,是否结束,位置,以及最长公共子串。

字段值最大是多少呢?

if ( m_uCurLCS>m_uLCS[uField] )
			m_uLCS[uField] = m_uCurLCS;

BYTE m_uLCS[SPH_MAX_FIELDS];

#define SPH_MAX_FIELDS			256
可以发现最大值是256.而字段值就存储在m_uHitpos最前面的8位,最小为0,最大为256。
	static inline DWORD GetLCS ( Hitpos_t uHitpos )
	{
		//1左移23位取反与uHitpos进行与操作
		return uHitpos & ~FIELDEND_MASK;
	}
正数的补码是其本身,负数的补码是除符号位取反加1。

此处一定要区分反码与取反的区别:

1的反码还是1

1按位取反,11111....1110,而计算机在存储的时候是补码存储的,-2的补码刚好是这个,所以1按位取反是-2.

同样,00000000 10000000 00000000 00000000
取反,11111111 01111111 11111111 11111111

减1,  11111111 01111111 11111111 11111110

除符号位取反

             10000000  10000000 00000000 00000001

所以他的反码是-8388609


这是数据举例

indextool -c ../etc/csft_daquan_suggest_all.conf --dumphitlist csft_daquan_suggest_web dai

通过indextool工具查看索引的情况,可以看到索引的内容,此语句查找命中dai的doc和hit

doc=500163049, hit=0x00800001
doc=500163049, hit=0x01800001

mysql> select * from IBO_suggest_info_web where id=500163049 \G;
*************************** 1. row ***************************
               id: 500163049
             type: 5
            title: Daisy
           weight: 2012583
           pinyin: Daisy
   is_web_removed: 1
is_client_removed: 1

在上边介绍的图中可以发现0x00800001的前8位,即00表示命中的字段,在此例中有00和01,

在上边数据库数据中也可以发现,title和pinyin中都有dai的数据。

0x00800001中的80,则表示在字段开头或结尾时命中。

后16位为,0001,表示title分词的第一个命中,

下边再举一例

doc=117884, hit=0x00800002

mysql> select * from IBO_suggest_info_web where id=117884 \G;
*************************** 1. row ***************************
               id: 117884
             type: 0
            title: 茫然一代
           weight: 1976000
           pinyin: mangranyidai
   is_web_removed: 1
is_client_removed: 1

可以用messg查看分词情况:

茫然/x 一代/x 

一代在分词的第二个位置,并且是后边没内容了,所以80,后边为02。



请详细分析该代码中所有function的详细功能:/* */ package com.smics.apps.erc.action.util; /* */ /* */ import com.smics.apps.core.Application; /* */ import com.smics.apps.core.setup.container.Container; /* */ import com.smics.apps.erc.ErcConstants; /* */ import com.smics.apps.erc.RuncardRule; /* */ import com.smics.apps.erc.domain.CompareInfo; /* */ import com.smics.apps.erc.domain.ErcMastForm; /* */ import com.smics.apps.erc.domain.StepForm; /* */ import com.smics.apps.erc.service.ErcService; /* */ import java.io.PrintStream; /* */ import java.util.ArrayList; /* */ import java.util.HashMap; /* */ import java.util.List; /* */ import java.util.Map; /* */ import org.apache.commons.logging.Log; /* */ import org.apache.commons.logging.LogFactory; /* */ /* */ public class CompareUtil /* */ { /* 33 */ protected static final Log log = LogFactory.getLog(CompareUtil.class); /* 34 */ public static ErcService ercService = null; /* */ /* */ public static String[] getStepNoRemoveRepatedStepNo(ErcMastForm requestForm) /* */ { /* 39 */ List allStepFoms = requestForm.getStepForms(); /* 40 */ List result = new ArrayList(); /* 41 */ for (int i = 0; i < allStepFoms.size(); i++) { /* 42 */ StepForm stepForm = (StepForm)allStepFoms.get(i); /* 43 */ if (("".equalsIgnoreCase(stepForm.getStepNo())) || (stepForm.getStepNo() == null)) /* */ continue; /* 45 */ if (!result.contains(stepForm.getStepNo())) { /* 46 */ result.add(stepForm.getStepNo().trim()); /* */ } /* */ } /* */ /* 50 */ return (String[])(String[])result.toArray(new String[result.size()]); /* */ } /* */ /* */ public static boolean isMatch(Map compareInfo, ErcMastForm ercForm, List mesAllStep) { /* 54 */ String[] ercSteps = getStepNoRemoveRepatedStepNo(ercForm); /* 55 */ if (mesAllStep.size() != ercSteps.length) { /* 56 */ return false; /* */ } /* 58 */ List stepForms = ercForm.getStepForms(); /* 59 */ for (int i = 0; i < stepForms.size(); i++) { /* 60 */ StepForm ercStep = (StepForm)stepForms.get(i); /* */ /* 62 */ if (compareInfo.containsKey(ercStep.getStepNo())) { /* 63 */ CompareInfo info = (CompareInfo)compareInfo.get(ercStep.getStepNo()); /* */ /* 65 */ if (!info.isSame()) { /* 66 */ return false; /* */ } /* 68 */ if ((!ercStep.getEqpModeName().trim().equals(info.getEQPGroup().trim())) || (!checkRecipe(ercStep, info))) /* */ { /* 71 */ return false; /* */ } /* */ /* 74 */ if ((!ercStep.getEqpModeName().trim().equals(info.getEQPGroup().trim())) || (!checkRecipe(ercStep, info))) /* */ { /* 77 */ return false; /* */ } /* */ /* 80 */ if ((ErcConstants.photoList.contains(ercStep.getArea())) && (!ercStep.getReticleId().equals(info.getReticleId()))) { /* 81 */ return false; /* */ } /* */ } /* */ } /* */ /* 86 */ return true; /* */ } /* */ public static boolean checkRecipe(StepForm stepForm, CompareInfo info) { /* 89 */ if (ErcConstants.photoList.contains(stepForm.getArea())) { /* 90 */ String recipe = stepForm.getTrackOutRecipe().trim() + "@" + stepForm.getScannerRecipe().trim(); /* 91 */ if (!recipe.equals(info.getMesRecipe().trim())) /* 92 */ return false; /* 93 */ } else if (!stepForm.getRecipe().equals(info.getMesRecipe())) { /* 94 */ return false; /* 95 */ }return true; /* */ } /* */ public static Map getRuncardInfoFromMES(ErcMastForm requestForm, String mode) { /* 98 */ String ruleName = "eRunCardByXML"; /* 99 */ Map parameters = new HashMap(); /* 100 */ Map result = new HashMap(); /* 101 */ String mbxKey = requestForm.getFabId(); /* 102 */ if ((mbxKey == null) || ("".equalsIgnoreCase(mbxKey))) { /* 103 */ result.put("errorMessage", "unKnow site" + mbxKey); /* */ } /* 105 */ if ("all".equalsIgnoreCase(mode)) /* 106 */ parameters = RuncardRule.getRuncardParametersByHoldStepAndCompletedStep(requestForm); /* */ else /* 108 */ parameters = RuncardRule.getRuncardParametersByCondition(requestForm); /* 109 */ result = ercService.eRuncardRole(ruleName, mbxKey, parameters); /* 110 */ String succ = (String)result.get("Result"); /* 111 */ if ((result == null) || (!"0".equalsIgnoreCase(succ))) { /* 112 */ log.info("Compare Util Failure"); /* 113 */ result.put("errorMessage", "MES Return Error:(" + result.get("Result") + ")" + (String)result.get("ErrorDesc")); /* */ } /* 115 */ return result; /* */ } /* */ public static Map getCompareInfo(ErcMastForm ercForm) { /* 118 */ Map errorInfos = new HashMap(); /* 119 */ Map runcardInfo = getRuncardInfoFromMES(ercForm, ""); /* 120 */ if (runcardInfo.get("errorMessage") != null) { /* 121 */ errorInfos.put("message", runcardInfo.get("errorMessage")); /* 122 */ return errorInfos; /* */ } /* 124 */ List stepNoFromMes = getMesAllStepNoByHoldStepNo(ercForm); /* 125 */ String[] ercSteps = getStepNoRemoveRepatedStepNo(ercForm); /* 126 */ List stepForms = ercForm.getStepForms(); /* 127 */ String resultForCompare = (String)runcardInfo.get("eRunCardInfo"); /* 128 */ String mesQty = (String)runcardInfo.get("Qty"); /* 129 */ String[] stepInfo = resultForCompare.trim().split("\\|\\|\\|"); /* 130 */ System.out.println(stepInfo.length); /* 131 */ for (int j = 0; j < stepInfo.length; j++) /* */ { /* 133 */ String[] splitStepInfo = stepInfo[j].split(","); /* 134 */ CompareInfo info = new CompareInfo(); /* 135 */ info.setStepNo(splitStepInfo[0]); /* 136 */ info.setMesQty(Integer.parseInt(mesQty)); /* 137 */ info.setMesRecipe(splitStepInfo[1]); /* 138 */ info.setEQPGroup(splitStepInfo[3]); /* 139 */ if (splitStepInfo.length > 4) /* 140 */ info.setReticleId(splitStepInfo[4]); /* */ else { /* 142 */ info.setReticleId(""); /* */ } /* 144 */ int sum = 0; /* 145 */ for (int m = 0; m < stepForms.size(); m++) { /* 146 */ StepForm stepForm = (StepForm)stepForms.get(m); /* 147 */ if (info.getStepNo().equalsIgnoreCase(stepForm.getStepNo())) { /* 148 */ sum += Integer.parseInt(stepForm.getWaferIdQty()); /* */ } /* */ } /* 151 */ info.setErcQty(sum); /* 152 */ ercForm.getCompareInfos().put(info.getStepNo(), info); /* */ } /* 154 */ System.out.println(ercForm.getCompareInfos()); /* 155 */ log.info("Runcard Id: " + ercForm.getCaseNo() + " start match value: " + ercForm.getMatch()); /* 156 */ if (!isMatch(ercForm.getCompareInfos(), ercForm, stepNoFromMes)) /* 157 */ ercForm.setMatch("N"); /* */ else /* 159 */ ercForm.setMatch("Y"); /* 160 */ ercForm.setErcAllStep(ercSteps); /* */ /* 162 */ ercForm.setMesStepLength(new Integer(stepNoFromMes.size())); /* 163 */ ercService.updateRequestForm(ercForm); /* 164 */ log.info("Runcard Id: " + ercForm.getCaseNo() + " after match value: " + ercForm.getMatch()); /* */ /* 166 */ return ercForm.getCompareInfos(); /* */ } /* */ public static List getMesAllStepNoByHoldStepNo(ErcMastForm ercForm) { /* 169 */ List mesAllStep = new ArrayList(); /* 170 */ Map runcardInfo = getRuncardInfoFromMES(ercForm, "all"); /* 171 */ String resultForCompare = (String)runcardInfo.get("eRunCardInfo"); /* 172 */ String[] stepInfo = resultForCompare.trim().split("\\|\\|\\|"); /* 173 */ for (int j = 0; j < stepInfo.length; j++) { /* 174 */ String[] splitStepInfo = stepInfo[j].split(","); /* */ /* 177 */ if ((ercForm.getErcStepNo() != null) && (ercForm.getErcStepNo().compareTo(splitStepInfo[0]) <= 0)) /* */ continue; /* 179 */ if (mesAllStep.size() == 0) /* 180 */ mesAllStep.add(splitStepInfo[0]); /* */ else /* 182 */ mesAllStep.add(";" + splitStepInfo[0]); /* */ } /* 184 */ return mesAllStep; /* */ } /* */ /* */ public static List getMesAllStepsByHoldStepNo(ErcMastForm ercForm) /* */ { /* 189 */ List mesAllStep = new ArrayList(); /* 190 */ Map runcardInfo = getRuncardInfoFromMES(ercForm, "all"); /* 191 */ String resultForCompare = (String)runcardInfo.get("eRunCardInfo"); /* 192 */ String[] stepInfo = resultForCompare.trim().split("\\|\\|\\|"); /* 193 */ for (int j = 0; j < stepInfo.length; j++) { /* 194 */ String[] splitStepInfo = stepInfo[j].split(","); /* 195 */ if (("".equals(stepInfo[j].trim())) || ( /* 196 */ (!"Auto Reposition Step".equalsIgnoreCase(ercForm.getErcCategory())) && (ercForm.getErcStepNo() != null) && (ercForm.getErcStepNo().compareTo(splitStepInfo[0]) <= 0))) continue; /* 197 */ Map stepInformation = new HashMap(); /* */ /* 199 */ stepInformation.put("stepNo", splitStepInfo[0]); /* 200 */ stepInformation.put("mesRecipe", splitStepInfo[1]); /* 201 */ stepInformation.put("EQPGroup", splitStepInfo[3]); /* 202 */ if (splitStepInfo.length > 4) /* 203 */ stepInformation.put("reticleId", splitStepInfo[4]); /* */ else { /* 205 */ stepInformation.put("reticleId", ""); /* */ } /* 207 */ mesAllStep.add(stepInformation); /* */ } /* 209 */ return mesAllStep; /* */ } /* */ /* */ static /* */ { /* 36 */ ercService = (ErcService)Application.getInstance().getContainer().getComponent("ercService"); /* */ } /* */ } /* Location: C:\Users\JE03789\Documents\erc.zip * Qualified Name: erc.WEB-INF.classes.com.smics.apps.erc.action.util.CompareUtil * JD-Core Version: 0.6.0 */
最新发布
09-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值