#include " ../DiskBuf.h " #pragma once #ifndef IsHz #define IsHz(x) (((x)>=0x81 && (x)<=0xA0)||((x)>=0xAA && (x)<=0xFE)) #define HzCode(x) ((((x)&0x7F)<<8)+((x)>>8)) #endif #ifdef _DEBUG #define _IndexBuffer_Size 0x80 #else #define _IndexBuffer_Size 0x1000 #endif class IndexMaker ... {private: struct IndexMakerBuf //词库缓冲数组 ...{ char nWords[3]; //[WordMaxLen]; unsigned long _Last_FileNum; unsigned long * _Data; unsigned long * DataPtr; unsigned long * DataEpr; } _WordsIndex[0x8000]; unsigned long *_Diskbuf; unsigned long _filenum; //文件号 char _mOutput_Path[512]; //索引输出路径 char* _mOutPath_EndPtr; //指向索引路径末端public: long __Error; //function ~IndexMaker(void) ...{ if (_Diskbuf) ...{ free(_Diskbuf); } } IndexMaker(char *OutPutDir)//cache set:default=840, unsigned long CacheSet ...{ _filenum=1; __Error=NULL; strcpy(_mOutput_Path,OutPutDir); _mOutPath_EndPtr=_mOutput_Path+strlen(_mOutput_Path); unsigned long *tbuf=_Diskbuf=(unsigned long *)calloc(0x8000*_IndexBuffer_Size,4); if (!_Diskbuf) ...{ __Error=_Error_Malloc;return ;} memset(_WordsIndex,0,sizeof(_WordsIndex)); for(unsigned long i=0;i<0x8000;i++) ...{ IndexMakerBuf *tWi=_WordsIndex+i; *(unsigned short *)(tWi->nWords)=(unsigned short)(((i>>8)|0x80)+((i&0xFF)<<8)); tWi->nWords[2]=NULL; tWi->_Last_FileNum=NULL; tWi->DataPtr=tWi->_Data=tbuf; tbuf+=_IndexBuffer_Size; tWi->DataEpr=tbuf; } } long _MakeIndex(DiskBuffer *m_Buffer) ...{ char *m_FileText=NULL; while (m_Buffer->_PopData(&m_FileText,&_filenum)) _fenci(m_FileText); for(unsigned long i=0;i<0x8000;i++) ...{ if (_WordsIndex[i].DataPtr!=_WordsIndex[i]._Data) _Write2File(_WordsIndex+i); } return __Error; }private: void _Write2File(IndexMakerBuf *nDataCache) ...{ strcpy(_mOutPath_EndPtr,nDataCache->nWords);//char name FILE *outFile; if (NULL==(outFile=fopen(_mOutput_Path,"ab+"))) ...{ printf("error write to file %s %lx ",_mOutput_Path,*_mOutPath_EndPtr); } else ...{ fwrite(nDataCache->_Data,4,(nDataCache->DataPtr)-(nDataCache->_Data),outFile); fclose(outFile); } nDataCache->DataPtr=nDataCache->_Data; } void _fenci(char *mTextCharPtr) ...{ mTextCharPtr--;//起始位置减一,使第一个位置为一而不是零 char * bptChar=mTextCharPtr; while (*(bptChar)) ...{ if ((*bptChar)<0) ...{ if (IsHz(*(unsigned char *)(bptChar))) ...{ //_Write2Cache(_WordsIndex+HzCode(*(unsigned short *)(bptChar)),(unsigned long)(bptChar-mTextCharPtr)); IndexMakerBuf *tDataCache=_WordsIndex+HzCode(*(unsigned short *)(bptChar)); unsigned long pushData=(unsigned long)(bptChar-mTextCharPtr); //void _Write2Cache(IndexMakerBuf *tDataCache,unsigned long pushData) //{ if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache); if (tDataCache->_Last_FileNum!=_filenum) ...{ *(tDataCache->DataPtr)=_filenum; tDataCache->_Last_FileNum=_filenum; tDataCache->DataPtr++; if (tDataCache->DataPtr==tDataCache->DataEpr) _Write2File(tDataCache); } *(tDataCache->DataPtr)=pushData; tDataCache->DataPtr++; //} //end function _Write2Cache } bptChar+=2; } else bptChar++; } //*//_WriteZero(); //for(unsigned long i=1;i<0x7F00;i++) //Powered by barenx IndexMakerBuf *nDataCacheEpr=_WordsIndex+0x7F00; for(IndexMakerBuf *nDataCache=_WordsIndex+1;nDataCache<nDataCacheEpr;nDataCache++) ...{ if (nDataCache->_Last_FileNum==_filenum) ...{ if (nDataCache->DataPtr==nDataCache->DataEpr) _Write2File(nDataCache); *(nDataCache->DataPtr)=NULL; (nDataCache->DataPtr)++; } } //end _WriteZero();*/ }} ; Powered by barenx