能够处理冲突的散列表(中文文本匹配)

 

#include <cstring>
#include <algorithm>
#include "tries.h"
#include "dicthead.h"


#define DEF_TRIES_DIC_VERSION 1


struct STRU_TRIES_INN_NODE
{
  unsigned int m_uiLen;
  unsigned int m_uiAttr;
  unsigned int m_uiBTPos;
};


CTriesDict::CTriesDict()
{
  m_pStrBuf = NULL;
  m_uiBufSize = 0;
  m_uiBCurPos = 0;
 
  m_pDicEntry = NULL;
  m_uiESize = 0;
  m_uiECurPos = 0;
 
  m_pSEInfo = NULL;
  m_uiSESize = 0;
  m_uiSECurPos = 0;
 
  m_pTNodeLst = NULL;
  m_uiTSize = 0;
  m_uiTCurPos = 0;
 
  m_uiStartPos = 0;
}

CTriesDict::~CTriesDict()
{
  if(m_pStrBuf)
    {
      free(m_pStrBuf);
      m_pStrBuf = NULL;
    }
 
  m_uiBufSize = 0;
  m_uiBCurPos = 0;
 
  if(m_pDicEntry)
    {
      free(m_pDicEntry);
      m_pDicEntry = NULL;
    }
  m_uiESize = 0;
  m_uiECurPos = 0;
 
  if(m_pSEInfo)
    {
      free(m_pSEInfo);
      m_pSEInfo = NULL;
    }
  m_uiSESize = 0;
  m_uiSECurPos = 0;
 
  if(m_pTNodeLst)
    {
      free(m_pTNodeLst);
      m_pTNodeLst = NULL;
    }
  m_uiTSize = 0;
  m_uiTCurPos = 0;
 
  m_uiStartPos = 0;
}

int CTriesDict::Create(unsigned int auiStrNum)
{
  if(auiStrNum<=0)
    {
      return -1;
    }
 
  STRU_TRIES_DICT_ENTRY *lpDicEntry = (STRU_TRIES_DICT_ENTRY*)calloc(4*auiStrNum,sizeof(STRU_TRIES_DICT_ENTRY));
  if(!lpDicEntry)
    {
      return -1;
    }
 
  char *lpStrBuf = (char*)calloc(50*auiStrNum,sizeof(char));
  if(!lpStrBuf)
    {
      return -1;
    }
 
  unsigned int *lpSEInfo = (unsigned int*)calloc(20*auiStrNum,sizeof(unsigned int));
 
  STRU_TRIES_DICT_NODE *lpDicNode = (STRU_TRIES_DICT_NODE*)calloc(auiStrNum,sizeof(STRU_TRIES_DICT_NODE));
 
  m_pStrBuf = lpStrBuf;
  m_uiBufSize = 50*auiStrNum;
  m_uiBCurPos = 0;
 
  m_pDicEntry = lpDicEntry;
  m_uiESize = 4*auiStrNum;
  m_uiECurPos = 0;
 
  m_pSEInfo = lpSEInfo;
  m_uiSESize = 20*auiStrNum;
  m_uiSECurPos = 0;
 
  m_pTNodeLst = lpDicNode;
  m_uiTSize = auiStrNum;
  m_uiTCurPos = 0;
 
  m_uiStartPos = 0;
  InitSuffEntry(m_uiStartPos,1,DEF_TRIES_FIRST);
  m_uiSECurPos += 3;
 
  return 0;
}

void CTriesDict::Destroy()
{
  if(m_pStrBuf)
    {
      free(m_pStrBuf);
      m_pStrBuf = 0;
    }
 
  m_uiBufSize = 0;
  m_uiBCurPos = 0;
 
  if(m_pDicEntry)
    {
      free(m_pDicEntry);
      m_pDicEntry = 0;
    }
 
  m_uiESize = 0;
  m_uiECurPos = 0;
 
  if(m_pSEInfo)
    {
      free(m_pSEInfo);
      m_pSEInfo = 0;
    }
 
  m_uiSESize = 0;
  m_uiSECurPos = 0;
 
  if(m_pTNodeLst)
    {
      free(m_pTNodeLst);
      m_pTNodeLst = 0;
    }
 
  m_uiTSize = 0;
  m_uiTCurPos = 0;
 
  m_uiStartPos = 0;
}

void CTriesDict::Clean()
{
  m_uiBCurPos = 0;

  m_uiECurPos = 0;
 
  m_uiSECurPos = 0;

  m_uiTCurPos = 0;
 
  m_uiStartPos = 0;
}

int CTriesDict::Load(const char *apFile)
{
  FILE *lpIn = NULL;
  int liNowPos = 0,liEndPos = 0;
 
  lpIn = fopen(apFile,"rb");
  if(!lpIn)
    {
      return -1;
    }
 
  STRU_DICT_HEAD_DEF loHead;
 
  int liRet = fread(&loHead.m_byType,sizeof(unsigned char),1,lpIn);
  if(liRet != 1)
    {
      fclose(lpIn);
      return -1;
    }
 
  if(loHead.m_byType != DEF_DICT_TRIES_TYPE)
    {
      fclose(lpIn);
      return -1;
    }
 
  liRet = fread(&loHead.m_byVersion,sizeof(unsigned char),1,lpIn);
  if(liRet != 1)
    {
      fclose(lpIn);
      return -1;
    }
 
  if(loHead.m_byVersion != DEF_TRIES_DIC_VERSION)
    {
      fclose(lpIn);
      return -1;
    }
 
  liRet = fread(loHead.m_szComment,sizeof(loHead.m_szComment),1,lpIn);
  if(liRet != 1)
    {
      fclose(lpIn);
      return -1;
    }
   
  fread(&m_uiBCurPos,sizeof(unsigned int),1,lpIn);
  m_pStrBuf = (char*)malloc(m_uiBCurPos+4);
  m_uiBufSize = m_uiBCurPos+4;
  fread(m_pStrBuf,sizeof(char),m_uiBCurPos,lpIn);
 
  fread(&m_uiSECurPos,sizeof(unsigned int),1,lpIn);
  m_pSEInfo = (unsigned int*)calloc(m_uiSECurPos+1,sizeof(unsigned int));
  m_uiSESize = m_uiSECurPos+1;
  fread(m_pSEInfo,sizeof(unsigned int),m_uiSECurPos,lpIn);
 
  fread(&m_uiECurPos,sizeof(unsigned int),1,lpIn);
  m_pDicEntry = (STRU_TRIES_DICT_ENTRY*)calloc(m_uiECurPos+1,sizeof(STRU_TRIES_DICT_ENTRY));
  m_uiESize = m_uiECurPos+1;
  fread(m_pDicEntry,sizeof(STRU_TRIES_DICT_ENTRY),m_uiECurPos,lpIn);
  
  fread(&m_uiTCurPos,sizeof(unsigned int),1,lpIn);
  m_pTNodeLst = (STRU_TRIES_DICT_NODE*)calloc(m_uiTCurPos+1,sizeof(STRU_TRIES_DICT_NODE));
  m_uiTSize = m_uiTCurPos + 1;
  for(unsigned int i = 0;i < m_uiTCurPos;i++)
    {
      STRU_TRIES_INN_NODE loTmpNode;
      fread(&loTmpNode,sizeof(STRU_TRIES_INN_NODE),1,lpIn);
      m_pTNodeLst[i].m_uiLen = loTmpNode.m_uiLen;
      m_pTNodeLst[i].m_uiAttr = loTmpNode.m_uiAttr;
      m_pTNodeLst[i].m_uiBTPos = loTmpNode.m_uiBTPos;
      m_pTNodeLst[i].m_pString = m_pStrBuf + m_pTNodeLst[i].m_uiBTPos;
    }
    
  fread(&m_uiStartPos,sizeof(m_uiStartPos),1,lpIn);
    
  liNowPos = ftell(lpIn);
  fseek(lpIn,0,SEEK_END);
  liEndPos = ftell(lpIn);
  if(liEndPos > liNowPos + 1)
    {
      fclose(lpIn);
      return -1;
    }
    
  fclose(lpIn);

  return 0;
}

int CTriesDict::Save(const char *apFile,const char *apComment)

  FILE* lpOutput = NULL;

  if(AdjustSEInfo() < -1)
    {
      return -1;
    }
 
  lpOutput = fopen(apFile,"wb");
  if(!lpOutput)
    {
      return -1;
    }
 
  //Record Head
  STRU_DICT_HEAD_DEF loHead;
  loHead.m_byType = DEF_DICT_TRIES_TYPE;
  fwrite(&loHead.m_byType,sizeof(unsigned char),1,lpOutput);
 
  loHead.m_byVersion = DEF_TRIES_DIC_VERSION;
  fwrite(&loHead.m_byVersion,sizeof(unsigned char),1,lpOutput);
 
  memset(loHead.m_szComment,0,sizeof(loHead.m_szComment));
  if(apComment)
    {
      int liSize = std::min(strlen(apComment),sizeof(loHead.m_szComment));
      memcpy(loHead.m_szComment,apComment,liSize);
    }
 
  fwrite(loHead.m_szComment,sizeof(loHead.m_szComment),1,lpOutput);
 
  //Record Data
  fwrite(&m_uiBCurPos,sizeof(unsigned int),1,lpOutput);
  fwrite(m_pStrBuf,sizeof(char),m_uiBCurPos,lpOutput);
 
  fwrite(&m_uiSECurPos,sizeof(unsigned int),1,lpOutput);
  fwrite(m_pSEInfo,sizeof(unsigned int),m_uiSECurPos,lpOutput);
 
  fwrite(&m_uiECurPos,sizeof(unsigned int),1,lpOutput);
  fwrite(m_pDicEntry,sizeof(STRU_TRIES_DICT_ENTRY),m_uiECurPos,lpOutput);
 
  fwrite(&m_uiTCurPos,sizeof(unsigned int),1,lpOutput);
  for(unsigned int i = 0; i < m_uiTCurPos;i++)
    {
      STRU_TRIES_INN_NODE loTmpNode;
      loTmpNode.m_uiLen = m_pTNodeLst[i].m_uiLen;
      loTmpNode.m_uiAttr = m_pTNodeLst[i].m_uiAttr;
      loTmpNode.m_uiBTPos = m_pTNodeLst[i].m_uiBTPos;
      fwrite(&loTmpNode,sizeof(STRU_TRIES_INN_NODE),1,lpOutput);
    }

  fwrite(&m_uiStartPos,sizeof(m_uiStartPos),1,lpOutput);
 
  fclose(lpOutput);
 
  return 0;
}

int CTriesDict::Add(const char *apString,unsigned int auiAttr)
{
  int liCurPos = 0;
  int liLen = strlen(apString);
  unsigned int luiLastDEPos = DEF_TRIES_FIRST;
  unsigned int luiCurDEPos = DEF_TRIES_COMMNULL;
  unsigned int luiStrCode = 0;
  unsigned int luiTPos = DEF_TRIES_COMMNULL;
 
  if(SeekString(const_cast<char*>(apString),strlen(apString)) != DEF_TRIES_NODENULL)
    {
      return 0;
    }
 
  while(liCurPos < liLen)
    {
      luiStrCode = GetStrCode(apString,liLen,&liCurPos);
      if(InsertDicEntry(luiLastDEPos,luiStrCode,luiCurDEPos) < 0)
 {
   return -1;
 }
  
      luiLastDEPos = luiCurDEPos;
    }
 
  if(m_pDicEntry[luiCurDEPos].m_uiValue != luiStrCode)
    {
      return -1;
    }
 
  if(m_uiTCurPos == m_uiTSize)
    {
      ResizeTNodeLst(m_uiTSize * 2);
    }
 
  luiTPos = m_uiTCurPos++;
  m_pDicEntry[luiCurDEPos].m_uiTPos = luiTPos;
 
  m_pTNodeLst[luiTPos].m_uiLen = liLen;
  m_pTNodeLst[luiTPos].m_uiAttr = auiAttr;
 
  while(m_uiBCurPos + liLen + 1 > m_uiBufSize)
    {
      ResizeBuf(m_uiBufSize * 2);
    }
 
  memcpy(m_pStrBuf+m_uiBCurPos,apString,liLen);
  m_pStrBuf[m_uiBCurPos+liLen] = 0;
  m_pTNodeLst[luiTPos].m_pString = m_pStrBuf+m_uiBCurPos;
  m_pTNodeLst[luiTPos].m_uiBTPos = m_uiBCurPos;
  m_uiBCurPos += liLen + 1;
 
  return 0;
}

int CTriesDict::Search(const char *apString,STRU_TRIES_SEARCH_RESULT *apResult,int aiSearchMode)
{
  int liBTPos = 0,liPos = 0;
  int liNextPos = liPos;
  unsigned int luiValue = 0;
  unsigned int luiENPos = 0;
  unsigned int luiEFPos = DEF_TRIES_FIRST;
  unsigned int luiTNodePos = DEF_TRIES_NODENULL;
  unsigned int luiSTNodePos = DEF_TRIES_NODENULL;
 
  int liStrLen = strlen(apString);
 
  apResult->m_uiSegCnt = 0;

  /*
    int  count=0;
    while(1)
    {
    if(!m_pDicEntry[count].m_uiValue)
    {
    break;
    }
    char   word[5]={0};
    word[0]=m_pDicEntry[count].m_uiValue/256;
    word[1]=m_pDicEntry[count].m_uiValue%256;

    printf("%s  ",word);
    count++;
    }
    printf("\n\ntotal  word  count  %d\n\n",count);

  */

 
  if(aiSearchMode == DEF_TRIES_SEARCH_FMM)
    {
      while(liPos < liStrLen)
 {
   liBTPos = liPos;
   luiValue = GetStrCode(apString,liStrLen,&liPos);

   if(luiValue == 0)
     {
       break;
     }


   char   word[5]={0};
   word[0]=luiValue/256;
   word[1]=luiValue%256;
   //   printf("%s  ",word);
 


   
   while(luiValue != 0 && (luiENPos = SeekEntry(luiEFPos,luiValue)) != DEF_TRIES_NULL)
     {
       char   word[5]={0};
       word[0]=m_pDicEntry[luiENPos].m_uiValue/256;
       word[1]=m_pDicEntry[luiENPos].m_uiValue%256;


       if((luiTNodePos = m_pDicEntry[luiENPos].m_uiTPos) != DEF_TRIES_NODENULL)
  {
    luiSTNodePos = luiTNodePos;
    liNextPos = liPos;
    luiEFPos = luiENPos;
  }
    
       luiValue = GetStrCode(apString,liStrLen,&liPos);
       word[0]=luiValue/256;
       word[1]=luiValue%256;
       //   printf("%s  ",word);
       luiEFPos = luiENPos;
     }
   
   if(luiSTNodePos != DEF_TRIES_NODENULL)
     {
       PackResult(apResult,m_pTNodeLst+luiSTNodePos,liBTPos);
       liPos = liNextPos;
     }
   else
     {
     
     }
   
   luiEFPos = DEF_TRIES_FIRST;
   luiSTNodePos = DEF_TRIES_NODENULL;

   //      liPos = liNextPos;
 }
    }
  else if(aiSearchMode == DEF_TRIES_SEARCH_ALL)
    {
      while(liPos < liStrLen)
 {
   liBTPos = liPos;
   luiValue = GetStrCode(apString,liStrLen,&liPos);
   if(luiValue == 0)
     {
       break;
     }
  
   liNextPos = liPos;
  
   while(luiValue != 0 && (luiENPos = SeekEntry(luiEFPos,luiValue)) != DEF_TRIES_NULL)
     {
       if((luiTNodePos = m_pDicEntry[luiENPos].m_uiTPos) != DEF_TRIES_NODENULL)
  {
    PackResult(apResult,m_pTNodeLst+luiTNodePos,liBTPos);
  }

       luiValue = GetStrCode(apString,liStrLen,&liPos);
       luiEFPos = luiENPos;
     }
 
   luiEFPos = DEF_TRIES_FIRST;
   liPos = liNextPos;
 }
    }

  return 0;
}

int CTriesDict::PackResult(STRU_TRIES_SEARCH_RESULT *apResult,
      STRU_TRIES_DICT_NODE *apNode,unsigned int auiOffset)
{
  if(apResult->m_uiSegCnt + 1 > apResult->m_uiMaxTermNum)
    {
      return -1;
    }
 
  apResult->m_ppResult[apResult->m_uiSegCnt] = apNode;
  apResult->m_pOffset[apResult->m_uiSegCnt] = auiOffset;
  apResult->m_uiSegCnt++;

  return 0;
}

//0:success;1:existed;<0:error
int CTriesDict::InsertDicEntry(unsigned int auiLastDEPos,unsigned int auiStrCode,unsigned int &auiCurDEPos)
{
  unsigned int luiSEOff = 0;
 
  //是否是首次插入
  if(auiLastDEPos != DEF_TRIES_FIRST)
    {
      luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
    }
  else
    {
      luiSEOff = m_uiStartPos;
    }
 
  //存在前缀
  if(luiSEOff == DEF_TRIES_SUFFNULL)
    {
      if(m_uiSECurPos + 3 > m_uiSESize)
 {
   AdjustSEInfo();
 }
  
      while(m_uiSECurPos + 3 > m_uiSESize)
 {
   ResizeSEInfo(m_uiSESize * 2);
 }
  
      m_pDicEntry[auiLastDEPos].m_uiSPos = m_uiSECurPos;
      InitSuffEntry(m_uiSECurPos,1,auiLastDEPos);
      m_uiSECurPos +=3;
      luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
    }
 
  unsigned int luiHSize = GetSuffHSize(luiSEOff);
  unsigned int luiHPos = auiStrCode % luiHSize;
  unsigned int luiDEPos = GetSuffEntry(luiSEOff,luiHPos);
 
  if((luiDEPos != DEF_TRIES_NULL) && (m_pDicEntry[luiDEPos].m_uiValue == auiStrCode))
    {
      auiCurDEPos = luiDEPos;
      return 0;
    }
  else
    {
      if(m_uiECurPos == m_uiESize)
 {
   ResizeDicEntry(m_uiESize*2);
 }  
  
      unsigned int luiCurDEPos = m_uiECurPos;
      m_uiECurPos++;
      m_pDicEntry[luiCurDEPos].m_uiValue = auiStrCode;
      m_pDicEntry[luiCurDEPos].m_uiTPos = DEF_TRIES_NODENULL;
      m_pDicEntry[luiCurDEPos].m_uiSPos = DEF_TRIES_SUFFNULL;
  
      if(luiDEPos == DEF_TRIES_NULL)
 {
   SetSuffEntry(luiSEOff,luiHPos,luiCurDEPos);
   auiCurDEPos = luiCurDEPos;
   return 1;
 }
      else //hash conflict solve
 {
   for(int luiNewHash = luiHSize+1;;luiNewHash++)
     {
       bool lbConflict = false;
    
       if(m_uiSECurPos + luiNewHash + 2 > m_uiSESize)
  {
    AdjustSEInfo();
  }
    
       while(m_uiSECurPos + luiNewHash + 2 > m_uiSESize)
  {
    ResizeSEInfo(m_uiSESize * 2);
  }
    
       if(auiLastDEPos != DEF_TRIES_FIRST)
  {
    luiSEOff = m_pDicEntry[auiLastDEPos].m_uiSPos;
  }
       else
  {
    luiSEOff = m_uiStartPos;
  }
    
       InitSuffEntry(m_uiSECurPos,luiNewHash,auiLastDEPos);
       for(int i = 0;i < luiHSize;i++)
  {
    unsigned int luiEntrance = GetSuffEntry(luiSEOff,i);
    if(luiEntrance != DEF_TRIES_NULL)
      {
        unsigned int luiTmpCode,luiTmpPos;
        luiTmpCode = m_pDicEntry[luiEntrance].m_uiValue;
        luiTmpPos = luiTmpCode % luiNewHash;
        if(GetSuffEntry(m_uiSECurPos,luiTmpPos) == DEF_TRIES_NULL)
   {
     SetSuffEntry(m_uiSECurPos,luiTmpPos,luiEntrance);
   }
        else
   {
     lbConflict = true;
     break;
   }
      }
  }
    
       if(!lbConflict)
  {
    unsigned int luiTmpValue = m_pDicEntry[luiCurDEPos].m_uiValue;
    unsigned int luiTmpPos = luiTmpValue % luiNewHash;
    if(GetSuffEntry(m_uiSECurPos,luiTmpPos) == DEF_TRIES_NULL)
      {
        SetSuffEntry(m_uiSECurPos,luiTmpPos,luiCurDEPos);
      }
    else
      {
        lbConflict = true;
      }
  }
    
       if(!lbConflict)
  {
    printf("newhash  size  is  %d  auilastdePos=%u  m_uiStartPos=%d\n", luiNewHash,auiLastDEPos,m_uiStartPos);
    if(auiLastDEPos != DEF_TRIES_FIRST)
      {
        m_pDicEntry[auiLastDEPos].m_uiSPos = m_uiSECurPos;
      }
    else
      {
        m_uiStartPos = m_uiSECurPos;
      }
     
    m_uiSECurPos += luiNewHash+2;
    auiCurDEPos = luiCurDEPos;
    return 1;
  }
     }
 }
    }
}

unsigned int CTriesDict::GetStrCode(const char *apString,const int aiLen,int *aiPos,const char *apChrSet)
{
  unsigned int liValue = 0;
 
  unsigned char *lpTmp=(unsigned char*)(apString+(*aiPos));
  int liOffset = 0;
 
  if(*aiPos >= aiLen)
    {  
      return 0;
    }
 
  liValue = lpTmp[0];
  liOffset = 1;
 
  if((*aiPos+1 < aiLen) && (lpTmp[0] >= 0x81 && lpTmp[0] <= 0xfe))
    {
      if(lpTmp[1] >= 0x40 && lpTmp[1] <= 0xfe)
 {
   liOffset = 2;
   liValue = lpTmp[0]*256 + lpTmp[1];
 }
      else if((lpTmp[1] >= 0x30 && lpTmp[1] <= 0x39))
 {
   if(*aiPos + 3 < aiLen
      &&(lpTmp[2] >= 0x81 && lpTmp[2] <= 0xfe)
      &&(lpTmp[3] >= 0x30 && lpTmp[3] <= 0x39))
     {
       liValue = lpTmp[0]*16777216 + lpTmp[1]*65536 + lpTmp[2]*256 + lpTmp[3];
       liOffset = 4;
     }
 }
    }

  *aiPos += liOffset;
 
  return liValue;
}

int CTriesDict::ResizeTNodeLst(unsigned int auiNewSize)
{
  if(auiNewSize < m_uiTCurPos)
    {
      return -1;
    }
 
  if(auiNewSize == m_uiTSize)
    {
      return 0;
    }
 
  STRU_TRIES_DICT_NODE *lpDic = (STRU_TRIES_DICT_NODE*)realloc(m_pTNodeLst,
              auiNewSize*sizeof(STRU_TRIES_DICT_NODE));
  
  if(!lpDic)
    {
      return -2;
    }
 
  m_pTNodeLst = lpDic;
  m_uiTSize = auiNewSize;
 
  return 0;
}

int CTriesDict::ResizeBuf(unsigned int auiNewSize)
{
  if(auiNewSize < m_uiBCurPos)
    {
      return -1;
    }
 
  if(auiNewSize == m_uiBufSize)
    {
      return 0;
    }
 
  char *lpBuf = (char*)realloc(m_pStrBuf,auiNewSize);
 
  if(!lpBuf)
    {
      return -2;
    }
 
  m_pStrBuf = lpBuf;
  m_uiBufSize = auiNewSize;
 
  return 0;
}

int CTriesDict::AdjustSEInfo()
{
  unsigned int luiNextEntry = 0;
  unsigned int luiNewPos = 0;
  unsigned int luiCurDEPos = 0;
  unsigned int luiHSize = 0;
   
  while(luiNextEntry < m_uiSECurPos)
    {
      luiCurDEPos = GetBTEntry(luiNextEntry);
      luiHSize = GetSuffHSize(luiNextEntry);
     
      if((luiCurDEPos != DEF_TRIES_FIRST) && (luiCurDEPos >= m_uiECurPos))
 {
   return -1;
 }
     
      if(((luiCurDEPos == DEF_TRIES_FIRST) && (m_uiStartPos != luiNextEntry)) ||
  ((luiCurDEPos != DEF_TRIES_FIRST) && (m_pDicEntry[luiCurDEPos].m_uiSPos != luiNextEntry)))
 {
   luiNextEntry += luiHSize + 2;
 }
      else
 {
   if(luiNextEntry != luiNewPos)
     {
       memmove(m_pSEInfo + luiNewPos,m_pSEInfo+luiNextEntry,(luiHSize+2)*sizeof(unsigned int));
       if(luiCurDEPos != DEF_TRIES_FIRST)
  {
    m_pDicEntry[luiCurDEPos].m_uiSPos = luiNewPos;
  }
       else
  {
    m_uiStartPos = luiNewPos;
  }
     }
      
   luiNextEntry += luiHSize + 2;
   luiNewPos += luiHSize + 2;
 }
    }
   
  if(luiNextEntry != m_uiSECurPos)
    {
      return -1;
    }
   
  m_uiSECurPos = luiNewPos;
   
  return 0;
}

int CTriesDict::ResizeSEInfo(unsigned int auiNewSize)
{
  if(auiNewSize < m_uiSECurPos)
    {
      return -1;
    }
 
  if(auiNewSize == m_uiSESize)
    {
      return 0;
    }
 
  unsigned int *lpSE = (unsigned int*)realloc(m_pSEInfo,auiNewSize*sizeof(unsigned int));
 
  if(!lpSE)
    {
      return -2;
    }
 
  m_pSEInfo = lpSE;
  m_uiSESize = auiNewSize;
 
  return 0;
}

int CTriesDict::ResizeDicEntry(unsigned int auiNewSize)
{
  if(auiNewSize < m_uiECurPos)
    {
      return -1;
    }
 
  if(auiNewSize == m_uiESize)
    {
      return 0;
    }
 
  STRU_TRIES_DICT_ENTRY *lpEntry = (STRU_TRIES_DICT_ENTRY*)realloc(m_pDicEntry,
           auiNewSize * sizeof(STRU_TRIES_DICT_ENTRY));
  
  if(!lpEntry)
    {
      return -2;
    }
 
  m_pDicEntry = lpEntry;
  m_uiESize = auiNewSize;
 
  return 0;
}

int CTriesDict::InitSuffEntry(unsigned int auiEntry,unsigned int auiHSize,unsigned int auiBTPos)
{
  unsigned int *lpEntry = m_pSEInfo + auiEntry;
 
  lpEntry[0] = auiHSize;
  lpEntry[1] = auiBTPos;
  for(int i = 0;i < auiHSize;i++)
    {
      lpEntry[i+2] = DEF_TRIES_NULL;
    }
 
  return 0;
}

unsigned int  CTriesDict::GetSuffEntry(unsigned int auiEntry,unsigned int auiHPos)
{
  unsigned int *lpEntry = m_pSEInfo + auiEntry;
 
  return lpEntry[2+auiHPos];
}

unsigned int  CTriesDict::GetSuffHSize(unsigned int auiEntry)
{
  return m_pSEInfo[auiEntry];
}

unsigned int  CTriesDict::GetBTEntry(unsigned int auiEntry)
{
  return m_pSEInfo[auiEntry+1];
}

int CTriesDict::SetSuffEntry(unsigned int auiEntry,unsigned int auiHPos,unsigned int auiEPos)
{
  m_pSEInfo[auiEntry+2+auiHPos] = auiEPos;
 
  return 0;
}

unsigned int CTriesDict::SeekEntry(unsigned int auiEPos,unsigned int auiStrCode)
{
  unsigned int luiSuffPos = 0;
  unsigned int luiEPos = 0;
  unsigned int luiHSize = 0;
  unsigned int luiHPos = 0;
   
  if(auiEPos == DEF_TRIES_FIRST)
    {
      luiSuffPos = m_uiStartPos;
    }
  else
    {
      luiSuffPos = m_pDicEntry[auiEPos].m_uiSPos;
    }
   
  if(luiSuffPos == DEF_TRIES_SUFFNULL)
    {
      return DEF_TRIES_NULL;
    }
   
  luiHSize = GetSuffHSize(luiSuffPos);
  luiHPos = auiStrCode % luiHSize;
   
  if(((luiEPos = GetSuffEntry(luiSuffPos,luiHPos)) == DEF_TRIES_NULL )||
     (m_pDicEntry[luiEPos].m_uiValue != auiStrCode))
    {
      return DEF_TRIES_NULL;
    }
  else
    {
      return luiEPos;
    }
}

unsigned int CTriesDict::SeekString(char *apTerm,int aiLen)
{
  unsigned int luiValue = 0;
  int liCurPos = 0;
  unsigned int luiHSize = 0;
  unsigned int luiHPos = 0;
  unsigned int luiEPos = 0;

  unsigned int luiSuffPos = m_uiStartPos;

  while(liCurPos < aiLen)
    {                          
      if(luiSuffPos == DEF_TRIES_SUFFNULL)
 {
   return DEF_TRIES_NODENULL;
 }
  
      luiValue =GetStrCode(apTerm,aiLen,&liCurPos);

      luiHSize = GetSuffHSize(luiSuffPos);
      luiHPos = luiValue % luiHSize;
      luiEPos = GetSuffEntry(luiSuffPos,luiHPos);
      if((luiEPos == DEF_TRIES_NULL)||(m_pDicEntry[luiEPos].m_uiValue != luiValue))
 {
   return DEF_TRIES_NODENULL;
 }
  
      luiSuffPos = m_pDicEntry[luiEPos].m_uiSPos;
    }

  return m_pDicEntry[luiEPos].m_uiTPos;
}

STRU_TRIES_SEARCH_RESULT *CTriesDict::AllocResult(unsigned int auiMaxTermNum)
{
  if(auiMaxTermNum <= 0)
    {
      return 0;
    }
 
  STRU_TRIES_SEARCH_RESULT *lpResult = (STRU_TRIES_SEARCH_RESULT *)calloc(1,sizeof(STRU_TRIES_SEARCH_RESULT));
 
  lpResult->m_uiMaxTermNum = auiMaxTermNum;
  lpResult->m_ppResult = (STRU_TRIES_DICT_NODE**)calloc(auiMaxTermNum,sizeof(STRU_TRIES_DICT_NODE*));
  lpResult->m_pOffset = (unsigned int*)calloc(auiMaxTermNum,sizeof(unsigned int));
  lpResult->m_uiSegCnt = 0;
 
  return lpResult;
}

void CTriesDict::FreeResult(STRU_TRIES_SEARCH_RESULT *apResult)
{
  if(apResult)
    {
      free(apResult->m_ppResult);
      free(apResult->m_pOffset);
      free(apResult);
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值