#include <cstring>
#include <algorithm>
#include "tries.h"
#include "dicthead.h"
#define DEF_TRIES_DIC_VERSION 1
struct STRU_TRIES_INN_NODE
{
unsigned int m_uiLen;
unsigned int m_uiAttr;
unsigned int m_uiBTPos;
};
CTriesDict::CTriesDict()
{
m_pStrBuf = NULL;
m_uiBufSize = 0;
m_uiBCurPos = 0;
m_pDicEntry = NULL;
m_uiESize = 0;
m_uiECurPos = 0;
m_pSEInfo = NULL;
m_uiSESize = 0;
m_uiSECurPos = 0;
m_pTNodeLst = NULL;
m_uiTSize = 0;
m_uiTCurPos = 0;
m_uiStartPos = 0;
}
CTriesDict::~CTriesDict()
{
if(m_pStrBuf)
{
free(m_pStrBuf);
m_pStrBuf = NULL;
}
m_uiBufSize = 0;
m_uiBCurPos = 0;
if(m_pDicEntry)
{
free(m_pDicEntry);
m_pDicEntry = NULL;
}
m_uiESize = 0;
m_uiECurPos = 0;
if(m_pSEInfo)
{
free(m_pSEInfo);
m_pSEInfo = NULL;
}
m_uiSESize = 0;
m_uiSECurPos = 0;
if(m_pTNodeLst)
{
free(m_pTNodeLst);
m_pTNodeLst = NULL;
}
m_uiTSize = 0;
m_uiTCurPos = 0;
m_uiStartPos = 0;
}
int CTriesDict::Create(unsigned int auiStrNum)
{
if(auiStrNum<=0)
{
return -1;
}
STRU_TRIES_DICT_ENTRY *lpDicEntry = (STRU_TRIES_DICT_ENTRY*)calloc(4*auiStrNum,sizeof(STRU_TRIES_DICT_ENTRY));
if(!lpDicEntry)
{
return -1;
}
char *lpStrBuf = (char*)calloc(50*auiStrNum,sizeof(char));
if(!lpStrBuf)
{
return -1;
}
unsigned int *lpSEInfo = (unsigned int*)calloc(20*auiStrNum,sizeof(unsigned int));
STRU_TRIES_DICT_NODE *lpDicNode = (STRU_TRIES_DICT_NODE*)calloc(auiStrNum,sizeof(STRU_TRIES_DICT_NODE));
m_pStrBuf = lpStrBuf;
m_uiBufSize = 50*auiStrNum;
m_uiBCurPos = 0;
m_pDicEntry = lpDicEntry;
m_uiESize = 4*auiStrNum;
m_uiECurPos = 0;
m_pSEInfo = lpSEInfo;
m_uiSESize = 20*auiStrNum;
m_uiSECurPos = 0;
m_pTNodeLst = lpDicNode;
m_uiTSize = auiStrNum;
m_uiTCurPos = 0;
m_uiStartPos = 0;
InitSuffEntry(m_uiStartPos,1,DEF_TRIES_FIRST);
m_uiSECurPos += 3;
return 0;
}
void CTriesDict::Destroy()
{
if(m_pStrBuf)
{
free(m_pStrBuf);
m_pStrBuf = 0;
}
m_uiBufSize = 0;
m_uiBCurPos = 0;
if(m_pDicEntry)
{
free(m_pDicEntry);
m_pDicEntry = 0;
}
m_uiESize = 0;
m_uiECurPos = 0;
if(m_pSEInfo)
{
free(m_pSEInfo);
m_pSEInfo = 0;
}
m_uiSESize = 0;
m_uiSECurPos = 0;
if(m_pTNodeLst)
{
free(m_pTNodeLst);
m_pTNodeLst = 0;
}
m_uiTSize = 0;
m_uiTCurPos = 0;
m_uiStartPos = 0;
}
void CTriesDict::Clean()
{
m_uiBCurPos = 0;
m_uiECurPos = 0;
m_uiSECurPos = 0;
m_uiTCurPos = 0;
m_uiStartPos = 0;
}
int CTriesDict::Load(const char *apFile)
{
FILE *lpIn = NULL;
int liNowPos = 0,liEndPos = 0;
lpIn = fopen(apFile,"rb");
if(!lpIn)
{
return -1;
}
STRU_DICT_HEAD_DEF loHead;
int liRet = fread(&loHead.m_byType,sizeof(unsigned char),1,lpIn);
if(liRet != 1)
{
fclose(lpIn);
return -1;
}
if(loHead.m_byType != DEF_DICT_TRIES_TYPE)
{
fclose(lpIn);
return -1;
}
liRet = fread(&loHead.m_byVersion,sizeof(unsigned char),1,lpIn);
if(liRet != 1)
{
fclose(lpIn);
return -1;
}
if(loHead.m_byVersion != DEF_TRIES_DIC_VERSION)
{
fclose(lpIn);
return -1;
}
liRet = fread(loHead.m_szComment,sizeof(loHead.m_szComment),1,lpIn);
if(liRet != 1)
{
fclose(lpIn);
return -1;
}
fread(&m_uiBCurPos,sizeof(unsigned int),1,lpIn);
m_pStrBuf = (char*)malloc(m_uiBCurPos+4);
m_uiBufSize = m_uiBCurPos+4;
fread(m_pStrBuf,sizeof(char),m_uiBCurPos,lpIn);
fread(&m_uiSECurPos,sizeof(unsigned int),1,lpIn);
m_pSEInfo = (unsigned int*)calloc(m_uiSECurPos+1,sizeof(unsigned int));
m_uiSESize = m_uiSECurPos+1;
fread(m_pSEInfo,sizeof(unsigned int),m_uiSECurPos,lpIn);
fread(&m_uiECurPos,sizeof(unsigned int),1,lpIn);
m_pDicEntry = (STRU_TRIES_DICT_ENTRY*)calloc(m_uiECurPos+1,sizeof(STRU_TRIES_DICT_ENTRY));
m_uiESize = m_uiECurPos+1;
fread(m_pDicEntry,sizeof(STRU_TRIES_DICT_ENTRY),m_uiECurPos,lpIn);
fread(&m_uiTCurPos,sizeof(unsigned int),1,lpIn);
m_pTNodeLst = (STRU_TRIES_DICT_NODE*)calloc(m_uiTCurPos+1,sizeof(STRU_TRIES_DICT_NODE));
m_uiTSize = m_uiTCurPos + 1;
for(unsigned int i = 0;i < m_uiTCurPos;i++)
{
STRU_TRIES_INN_NODE loTmpNode;
fread(&loTmpNode,sizeof(STRU_TRIES_INN_NODE),1,lpIn);
m_pTNodeLst[i].m_uiLen = loTmpNode.m_uiLen;
m_pTNodeLst[i].m_uiAttr = loTmpNode.m_uiAttr;
m_pTNodeLst[i].m_uiBTPos = loTmpNode.m_uiBTPos;
m_pTNodeLst[i].m_pString = m_pStrBuf + m_pTNodeLst[i].m_uiBTPos;
}
fread(&m_uiStartPos,sizeof(m_uiStartPos),1,lpIn);
liNowPos = ftell(lpIn);
fseek(lpIn,0,SEEK_END);
liEndPos = ftell(lpIn);
if(liEndPos > liNowPos + 1)
{
fclose(lpIn);
return -1;
}
fclose(lpIn);
return 0;
}
int CTriesDict::Save(const char *apFile,const char *apComment)
{
FILE* lpOutput = NULL;
if(AdjustSEInfo() < -1)
{
return -1;
}
lpOutput = fopen(apFile,"wb");
if(!lpOutput)
{
return -1;
}
//Record Head
STRU_DICT_HEAD_DEF loHead;
loHead.m_byType = DEF_DICT_TRIES_TYPE;
fwrite(&loHead.m_byType,sizeof(unsigned char),1,lpOutput);
loHead.m_byVersion = DEF_TRIES_DIC_VERSION;
fwrite(&loHead.m_byVersion,sizeof(unsigned char),1,lpOutput);
memset(loHead.m_szComment,0,sizeof(loHead.m_szComment));
if(apComment)
{
int liSize = std::min(strlen(apComment),sizeof(loHead.m_szComment));
memcpy(loHead.m_szComment,apComment,liSize);
}
fwrite(loHead.m_szComment,sizeof(loHead.m_szComment),1,lpOutput);
//Record Data
fwrite(&m_uiBCurPos,sizeof(unsigned int),1,lpOutput);
fwrite(m_pStrBuf,sizeof(char),m_uiBCurPos,lpOutput);
fwrite(&m_uiSECurPos,sizeof(unsigned int),1,lpOutput);
fwrite(m_pSEInfo,sizeof(unsigned int),m_uiSECurPos,lpOutput);
fwrite(&m_uiECurPos,sizeof(unsigned int),1,lpOutput);
fwrite(m_pDicEntry,sizeof(STRU_TRIES_DICT_ENTRY),m_uiECurPos,lpOutput);
fwrite(&m_uiTCurPos,sizeof(unsigned int),1,lpOutput);
for(unsigned int i = 0; i < m_uiTCurPos;i++)
{
STRU_TRIES_INN_NODE loTmpNode;
loTmpNode.m_uiLen = m_pTNodeLst[i].m_uiLen;
loTmpNode.m_uiAttr = m_pTNodeLst[i].m_uiAttr;
loTmpNode.m_uiBTPos = m_pTNodeLst[i].m_uiBTPos;
fwrite(&loTmpNode,sizeof(STRU_TRIES_INN_NODE),1,lpOutput);
}
fwrite(&m_uiStartPos,sizeof(m_uiStartPos),1,lpOutput);
fclose(lpOutput);
return 0;
}
int CTriesDict::Add(const char *apString,unsigned int auiAttr)
{
int liCurPos = 0;
int liLen = strlen(apString);
unsigned int luiLastDEPos = DEF_TRIES_FIRST;
unsigned int luiCurDEPos = DEF_TRIES_COMMNULL;
unsigned int luiStrCode = 0;
unsigned int luiTPos = DEF_TRIES_COMMNULL;
if(SeekString(const_cast<char*>(apString),strlen(apString)) != DEF_TRIES_NODENULL)
{
return 0;
}
while(liCurPos < liLen)
{
luiStrCode = GetStrCode(apString,liLen,&liCurPos);
if(InsertDicEntry(luiLastDEPos,luiStrCode,luiCurDEPos) < 0)
{
return -1;
}
luiLastDEPos = luiCurDEPos;
}
if(m_pDicEntry[luiCurDEPos].m_uiValue != luiStrCode)
{
return -1;
}
if(m_uiTCurPos == m_uiTSize)
{
ResizeTNodeLst(m_uiTSize * 2);
}
luiTPos = m_uiTCurPos++;
m_pDicEntry[luiCurDEPos].m_uiTPos = luiTPos;
m_pTNodeLst[luiTPos].m_uiLen = liLen;
m_pTNodeLst[luiTPos].m_uiAttr = auiAttr;
while(m_uiBCurPos + liLen + 1 > m_uiBufSize)
{
ResizeBuf(m_uiBufSize * 2);
}
memcpy(m_pStrBuf+m_uiBCurPos,apString,liLen);
m_pStrBuf[m_uiBCurPos+liLen] = 0;
m_pTNodeLst[luiTPos].m_pString = m_pStrBuf+m_uiBCurPos;
m_pTNodeLst[luiTPos].m_uiBTPos = m_uiBCurPos;
m_uiBCurPos += liLen + 1;
return 0;
}
int CTriesDict::Search(const char *apString,STRU_TRIES_SEARCH_RESULT *apResult,int aiSearchMode)
{
int liBTPos = 0,liPos = 0;
int liNextPos = liPos;
unsigned int luiValue = 0;
unsigned int luiENPos = 0;
unsigned int luiEFPos = DEF_TRIES_FIRST;
unsigned int luiTNodePos = DEF_TRIES_NODENULL;
unsigned int luiSTNodePos = DEF_TRIES_NODENULL;
int liStrLen = strlen(apString);
apResult->m_uiSegCnt = 0;
/*
int count=0;
while(1)
{
if(!m_pDicEntry[count].m_uiValue)
{
break;
}
char word[5]={0};
word[0]=m_pDicEntry[count].m_uiValue/256;
word[1]=m_pDicEntry[count].m_uiValue%256;
printf("%s ",word);
count++;
}
printf("\n\ntotal word count %d\n\n",count);
*/
if(aiSearchMode == DEF_TRIES_SEARCH_FMM)
{
while(liPos < liStrLen)
{
liBTPos = liPos;
luiValue = GetStrCode(apString,liStrLen,&liPos);
if(luiValue == 0)
{
break;
}
char word[5]={0};
word[0]=luiValue/256;
word[1]=luiValue%256;
// printf("%s ",word);
while(luiValue != 0 && (luiENPos = SeekEntry(luiEFPos,luiValue)) != DEF_TRIES_NULL)
{
char word[5]={0};
word[0]=m_pDicEntry[luiENPos].m_uiValue/256;
word[1]=m_pDicEntry[luiENPos].m_uiValue%256;
if((luiTNodePos = m_pDicEntry[luiENPos].m_uiTPos) != DEF_TRIES_NODENULL)
{
luiSTNodePos = luiTNodePos;
liNextPos = liPos;
luiEFPos = luiENPos;
}
luiValue = GetStrCode(apString,liStrLen,&liPos);
word[0]=luiValue/256;
word[1]=luiValue%256;
// printf("%s ",word);
luiEFPos = luiENPos;
}
if(luiSTNodePos != DEF_TRIES_NODENULL)
{
PackResult(apResult,m_pTNodeLst+luiSTNodePos,liBTPos);
liPos = liNextPos;
}
else
{
}
luiEFPos = DEF_TRIES_FIRST;
luiSTNodePos = DEF_TRIES_NODENULL;
// liPos = liNextPos;
}
}
else if(aiSearchMode == DEF_TRIES_SEARCH_ALL)
{
while(liPos < liStrLen)
{
liBTPos = liPos;
luiValue = GetStrCode(apString,liStrLen,&liPos);
if(luiValue == 0)
{
break;
}
liNextPos = liPos;
while(luiValue != 0 && (luiENPos = SeekEntry(luiEFPos,luiValue)) != DEF_TRIES_NULL)
{
if((luiTNodePos = m_pDicEntry[luiENPos].m_uiTPos) != DEF_TRIES_NODENULL)
{
PackResult(apResult,m_pTNodeLst+luiTNodePos,liBTPos);
}
luiValue = GetStrCode(apString,liStrLen,&liPos);
luiEFPos = luiENPos;
}
luiEFPos = DEF_TRIES_FIRST;
liPos = liNextPos;
}
}
return 0;
}
int CTriesDict::PackResult(STRU_TRIES_SEARCH_RESULT *apResult,
STRU_TRIES_DICT_NODE *apNode,unsigned int auiOffset)
{
if(apResult->m_uiSegCnt + 1 > apResult->m_uiMaxTermNum)
{
return -1;
}
apResult->m_ppResult[apResult->m_uiSegCnt] = apNode;
apResult->m_pOffset[apResult->m_uiSegCnt] = auiOffset;
apResult->m_uiSegCnt++;
return 0;
}
//0:success;1:existed;<0:error
int CTriesDict::InsertDicEntry(unsigned int auiLastDEPos,unsigned int auiStrCode,unsigned int &auiCurDEPos)
{
unsigned int luiSEOff = 0;
//是否是首次插入
if(auiLastDEPos != DEF_TRIES_FIRST)
{
luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
}
else
{
luiSEOff = m_uiStartPos;
}
//存在前缀
if(luiSEOff == DEF_TRIES_SUFFNULL)
{
if(m_uiSECurPos + 3 > m_uiSESize)
{
AdjustSEInfo();
}
while(m_uiSECurPos + 3 > m_uiSESize)
{
ResizeSEInfo(m_uiSESize * 2);
}
m_pDicEntry[auiLastDEPos].m_uiSPos = m_uiSECurPos;
InitSuffEntry(m_uiSECurPos,1,auiLastDEPos);
m_uiSECurPos +=3;
luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
}
unsigned int luiHSize = GetSuffHSize(luiSEOff);
unsigned int luiHPos = auiStrCode % luiHSize;
unsigned int luiDEPos = GetSuffEntry(luiSEOff,luiHPos);
if((luiDEPos != DEF_TRIES_NULL) && (m_pDicEntry[luiDEPos].m_uiValue == auiStrCode))
{
auiCurDEPos = luiDEPos;
return 0;
}
else
{
if(m_uiECurPos == m_uiESize)
{
ResizeDicEntry(m_uiESize*2);
}
unsigned int luiCurDEPos = m_uiECurPos;
m_uiECurPos++;
m_pDicEntry[luiCurDEPos].m_uiValue = auiStrCode;
m_pDicEntry[luiCurDEPos].m_uiTPos = DEF_TRIES_NODENULL;
m_pDicEntry[luiCurDEPos].m_uiSPos = DEF_TRIES_SUFFNULL;
if(luiDEPos == DEF_TRIES_NULL)
{
SetSuffEntry(luiSEOff,luiHPos,luiCurDEPos);
auiCurDEPos = luiCurDEPos;
return 1;
}
else //hash conflict solve
{
for(int luiNewHash = luiHSize+1;;luiNewHash++)
{
bool lbConflict = false;
if(m_uiSECurPos + luiNewHash + 2 > m_uiSESize)
{
AdjustSEInfo();
}
while(m_uiSECurPos + luiNewHash + 2 > m_uiSESize)
{
ResizeSEInfo(m_uiSESize * 2);
}
if(auiLastDEPos != DEF_TRIES_FIRST)
{
luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
}
else
{
luiSEOff = m_uiStartPos;
}
InitSuffEntry(m_uiSECurPos,luiNewHash,auiLastDEPos);
for(int i = 0;i < luiHSize;i++)
{
unsigned int luiEntrance = GetSuffEntry(luiSEOff,i);
if(luiEntrance != DEF_TRIES_NULL)
{
unsigned int luiTmpCode,luiTmpPos;
luiTmpCode = m_pDicEntry[luiEntrance].m_uiValue;
luiTmpPos = luiTmpCode % luiNewHash;
if(GetSuffEntry(m_uiSECurPos,luiTmpPos) == DEF_TRIES_NULL)
{
SetSuffEntry(m_uiSECurPos,luiTmpPos,luiEntrance);
}
else
{
lbConflict = true;
break;
}
}
}
if(!lbConflict)
{
unsigned int luiTmpValue = m_pDicEntry[luiCurDEPos].m_uiValue;
unsigned int luiTmpPos = luiTmpValue % luiNewHash;
if(GetSuffEntry(m_uiSECurPos,luiTmpPos) == DEF_TRIES_NULL)
{
SetSuffEntry(m_uiSECurPos,luiTmpPos,luiCurDEPos);
}
else
{
lbConflict = true;
}
}
if(!lbConflict)
{
printf("newhash size is %d auilastdePos=%u m_uiStartPos=%d\n", luiNewHash,auiLastDEPos,m_uiStartPos);
if(auiLastDEPos != DEF_TRIES_FIRST)
{
m_pDicEntry[auiLastDEPos].m_uiSPos = m_uiSECurPos;
}
else
{
m_uiStartPos = m_uiSECurPos;
}
m_uiSECurPos += luiNewHash+2;
auiCurDEPos = luiCurDEPos;
return 1;
}
}
}
}
}
unsigned int CTriesDict::GetStrCode(const char *apString,const int aiLen,int *aiPos,const char *apChrSet)
{
unsigned int liValue = 0;
unsigned char *lpTmp=(unsigned char*)(apString+(*aiPos));
int liOffset = 0;
if(*aiPos >= aiLen)
{
return 0;
}
liValue = lpTmp[0];
liOffset = 1;
if((*aiPos+1 < aiLen) && (lpTmp[0] >= 0x81 && lpTmp[0] <= 0xfe))
{
if(lpTmp[1] >= 0x40 && lpTmp[1] <= 0xfe)
{
liOffset = 2;
liValue = lpTmp[0]*256 + lpTmp[1];
}
else if((lpTmp[1] >= 0x30 && lpTmp[1] <= 0x39))
{
if(*aiPos + 3 < aiLen
&&(lpTmp[2] >= 0x81 && lpTmp[2] <= 0xfe)
&&(lpTmp[3] >= 0x30 && lpTmp[3] <= 0x39))
{
liValue = lpTmp[0]*16777216 + lpTmp[1]*65536 + lpTmp[2]*256 + lpTmp[3];
liOffset = 4;
}
}
}
*aiPos += liOffset;
return liValue;
}
int CTriesDict::ResizeTNodeLst(unsigned int auiNewSize)
{
if(auiNewSize < m_uiTCurPos)
{
return -1;
}
if(auiNewSize == m_uiTSize)
{
return 0;
}
STRU_TRIES_DICT_NODE *lpDic = (STRU_TRIES_DICT_NODE*)realloc(m_pTNodeLst,
auiNewSize*sizeof(STRU_TRIES_DICT_NODE));
if(!lpDic)
{
return -2;
}
m_pTNodeLst = lpDic;
m_uiTSize = auiNewSize;
return 0;
}
int CTriesDict::ResizeBuf(unsigned int auiNewSize)
{
if(auiNewSize < m_uiBCurPos)
{
return -1;
}
if(auiNewSize == m_uiBufSize)
{
return 0;
}
char *lpBuf = (char*)realloc(m_pStrBuf,auiNewSize);
if(!lpBuf)
{
return -2;
}
m_pStrBuf = lpBuf;
m_uiBufSize = auiNewSize;
return 0;
}
int CTriesDict::AdjustSEInfo()
{
unsigned int luiNextEntry = 0;
unsigned int luiNewPos = 0;
unsigned int luiCurDEPos = 0;
unsigned int luiHSize = 0;
while(luiNextEntry < m_uiSECurPos)
{
luiCurDEPos = GetBTEntry(luiNextEntry);
luiHSize = GetSuffHSize(luiNextEntry);
if((luiCurDEPos != DEF_TRIES_FIRST) && (luiCurDEPos >= m_uiECurPos))
{
return -1;
}
if(((luiCurDEPos == DEF_TRIES_FIRST) && (m_uiStartPos != luiNextEntry)) ||
((luiCurDEPos != DEF_TRIES_FIRST) && (m_pDicEntry[luiCurDEPos].m_uiSPos != luiNextEntry)))
{
luiNextEntry += luiHSize + 2;
}
else
{
if(luiNextEntry != luiNewPos)
{
memmove(m_pSEInfo + luiNewPos,m_pSEInfo+luiNextEntry,(luiHSize+2)*sizeof(unsigned int));
if(luiCurDEPos != DEF_TRIES_FIRST)
{
m_pDicEntry[luiCurDEPos].m_uiSPos = luiNewPos;
}
else
{
m_uiStartPos = luiNewPos;
}
}
luiNextEntry += luiHSize + 2;
luiNewPos += luiHSize + 2;
}
}
if(luiNextEntry != m_uiSECurPos)
{
return -1;
}
m_uiSECurPos = luiNewPos;
return 0;
}
int CTriesDict::ResizeSEInfo(unsigned int auiNewSize)
{
if(auiNewSize < m_uiSECurPos)
{
return -1;
}
if(auiNewSize == m_uiSESize)
{
return 0;
}
unsigned int *lpSE = (unsigned int*)realloc(m_pSEInfo,auiNewSize*sizeof(unsigned int));
if(!lpSE)
{
return -2;
}
m_pSEInfo = lpSE;
m_uiSESize = auiNewSize;
return 0;
}
int CTriesDict::ResizeDicEntry(unsigned int auiNewSize)
{
if(auiNewSize < m_uiECurPos)
{
return -1;
}
if(auiNewSize == m_uiESize)
{
return 0;
}
STRU_TRIES_DICT_ENTRY *lpEntry = (STRU_TRIES_DICT_ENTRY*)realloc(m_pDicEntry,
auiNewSize * sizeof(STRU_TRIES_DICT_ENTRY));
if(!lpEntry)
{
return -2;
}
m_pDicEntry = lpEntry;
m_uiESize = auiNewSize;
return 0;
}
int CTriesDict::InitSuffEntry(unsigned int auiEntry,unsigned int auiHSize,unsigned int auiBTPos)
{
unsigned int *lpEntry = m_pSEInfo + auiEntry;
lpEntry[0] = auiHSize;
lpEntry[1] = auiBTPos;
for(int i = 0;i < auiHSize;i++)
{
lpEntry[i+2] = DEF_TRIES_NULL;
}
return 0;
}
unsigned int CTriesDict::GetSuffEntry(unsigned int auiEntry,unsigned int auiHPos)
{
unsigned int *lpEntry = m_pSEInfo + auiEntry;
return lpEntry[2+auiHPos];
}
unsigned int CTriesDict::GetSuffHSize(unsigned int auiEntry)
{
return m_pSEInfo[auiEntry];
}
unsigned int CTriesDict::GetBTEntry(unsigned int auiEntry)
{
return m_pSEInfo[auiEntry+1];
}
int CTriesDict::SetSuffEntry(unsigned int auiEntry,unsigned int auiHPos,unsigned int auiEPos)
{
m_pSEInfo[auiEntry+2+auiHPos] = auiEPos;
return 0;
}
unsigned int CTriesDict::SeekEntry(unsigned int auiEPos,unsigned int auiStrCode)
{
unsigned int luiSuffPos = 0;
unsigned int luiEPos = 0;
unsigned int luiHSize = 0;
unsigned int luiHPos = 0;
if(auiEPos == DEF_TRIES_FIRST)
{
luiSuffPos = m_uiStartPos;
}
else
{
luiSuffPos = m_pDicEntry[auiEPos].m_uiSPos;
}
if(luiSuffPos == DEF_TRIES_SUFFNULL)
{
return DEF_TRIES_NULL;
}
luiHSize = GetSuffHSize(luiSuffPos);
luiHPos = auiStrCode % luiHSize;
if(((luiEPos = GetSuffEntry(luiSuffPos,luiHPos)) == DEF_TRIES_NULL )||
(m_pDicEntry[luiEPos].m_uiValue != auiStrCode))
{
return DEF_TRIES_NULL;
}
else
{
return luiEPos;
}
}
unsigned int CTriesDict::SeekString(char *apTerm,int aiLen)
{
unsigned int luiValue = 0;
int liCurPos = 0;
unsigned int luiHSize = 0;
unsigned int luiHPos = 0;
unsigned int luiEPos = 0;
unsigned int luiSuffPos = m_uiStartPos;
while(liCurPos < aiLen)
{
if(luiSuffPos == DEF_TRIES_SUFFNULL)
{
return DEF_TRIES_NODENULL;
}
luiValue =GetStrCode(apTerm,aiLen,&liCurPos);
luiHSize = GetSuffHSize(luiSuffPos);
luiHPos = luiValue % luiHSize;
luiEPos = GetSuffEntry(luiSuffPos,luiHPos);
if((luiEPos == DEF_TRIES_NULL)||(m_pDicEntry[luiEPos].m_uiValue != luiValue))
{
return DEF_TRIES_NODENULL;
}
luiSuffPos = m_pDicEntry[luiEPos].m_uiSPos;
}
return m_pDicEntry[luiEPos].m_uiTPos;
}
STRU_TRIES_SEARCH_RESULT *CTriesDict::AllocResult(unsigned int auiMaxTermNum)
{
if(auiMaxTermNum <= 0)
{
return 0;
}
STRU_TRIES_SEARCH_RESULT *lpResult = (STRU_TRIES_SEARCH_RESULT *)calloc(1,sizeof(STRU_TRIES_SEARCH_RESULT));
lpResult->m_uiMaxTermNum = auiMaxTermNum;
lpResult->m_ppResult = (STRU_TRIES_DICT_NODE**)calloc(auiMaxTermNum,sizeof(STRU_TRIES_DICT_NODE*));
lpResult->m_pOffset = (unsigned int*)calloc(auiMaxTermNum,sizeof(unsigned int));
lpResult->m_uiSegCnt = 0;
return lpResult;
}
void CTriesDict::FreeResult(STRU_TRIES_SEARCH_RESULT *apResult)
{
if(apResult)
{
free(apResult->m_ppResult);
free(apResult->m_pOffset);
free(apResult);
}
}