Super Power Dictionary
C.Third Edition
This is the third edition of my dictionary and it is very, very powerful now! Read in 20K words from a 2.3M text
file only uses less than 2seconds. However, ranking them will take 41seconds! As it need to track each node from
"WordTree" from "big to small" by "Frequency" and "rank" them by this sequence and then copy each node to a node
of "RankTree" then "insert" the new rankTree node into ranktree.
1。 Basic idea: I found out some mistakes in "ranking" functions. And it is really a hardtime to debug for a
whole day in lab(almost 8hours from 10:00AM to 18:30PM).
2。 Program design: The biggest problem is still how to walk round the "key" problem of RankTree by using "frequency"
as key to compare and at same keep all ancester class with their original functions.
3。 Major function:
A. void traverse(...);
I add one more parameters to let "RankTree Forest" to start traversing from its own "root".
B. void insertRank(Tree* node, void* userPtr)
This is a special "callback" function to construct "RankTree" forest by "traversing" each node of
"WordTree".
C. void Tree::maxWalk(...);
There are two format of this new MAXORDER way of traversing. Actually it is so-called "from big to small"
exactly opposite to "MIDORDER".
4。 Further improvement:
A. I have to go soon, so, no more time.
B. Still need improve.
C. Ranking counting and WordTree counting is differed by one, and I have not found the bug.
D. I hope I can rewrite the basic string-related part to enable the dictionary to read Chinese.
#include #include #include #include #include
using namespace std;
const char* DefaultBackup = "c:nickback.txt"; const MaxWordLength = 255;
enum TraverseOrder { PREORDER, MIDORDER, POSTORDER, MAXORDER };
class Tree { private: Tree* pLeft; Tree* pRight; Tree* pParent; char* pName; int index; static int treeCounter; static Tree* pRoot; int freq; int rank; void maxWalk(Tree* node, void (*visit)(Tree* node)); void preWalk(Tree* node, void (*visit)(Tree* node)); void midWalk(Tree* node, void (*visit)(Tree* node)); void postWalk(Tree* node, void (*visit)(Tree* node)); void maxWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr); void preWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr); void midWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr); void postWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr); protected: void* pKey; public: void decCount() {treeCounter --;} const int getRank() { return rank;} void setRank(const int newRank) { rank = newRank;} const int getFreq() {return freq;} void setFreq(const int newFreq) { freq = newFreq;} void setFreq() {freq++;} Tree* left(); Tree* right(); Tree* parent(); virtual void* key(); virtual void setKey(void* newKey); void setRoot(Tree* node); const char* name(); const int getIndex(); void setLeft(Tree* leftSon); void setRight(Tree* rightSon); void setParent(Tree* father); void setIndex(const int number); virtual int compare(const void* otherKey); void setName(const char *str); void giveBirth(); virtual bool operator>(Tree&); virtual bool operator==(Tree&); virtual bool operator
void visitRank(Tree* node, void* userPtr);
void defaultVisit(Tree* node);
void insertRank(Tree* node, void* userPtr);
class BinaryTree: public Tree { private:
public: virtual void remove(BinaryTree* node); virtual BinaryTree* next(); virtual BinaryTree* prev(); virtual bool insert(BinaryTree* node, BinaryTree* start=(BinaryTree*)root()); virtual ~BinaryTree();
};
class WordTree: public BinaryTree { private: static int wordLength; static int wordCounter; WordTree* addNode(WordTree* node, FILE** stream);
protected: void setWordCounter(); void readWord(FILE** file); void addWord(char* buffer);
public: WordTree* searchWord(WordTree* node, const char* buffer); WordTree* readNode(WordTree* node, FILE** stream); WordTree* readFromFile(const char* fileName); void saveNode(WordTree* node, FILE** stream); virtual int compare(const void *otherKey); void setWordLength(const int newLength); static const int getWordLength(); virtual void setKey(void* newKey); virtual bool operator>(Tree&); virtual bool operator==(Tree&); virtual bool operator
class RankTree: public WordTree { private:
protected:
public: virtual bool operator>(Tree& node); virtual bool operator
};
int count=0;
int main() { WordTree *ptr = new WordTree; char choice[6], fileName[30]; long start =0;
ptr = ptr->readFromFile(DefaultBackup); cout<>choice; while (strcmp(choice, "quit")!=0&&strcmp(choice, "input")!=0) { cout<>choice; } if (strcmp(choice, "quit")==0) { cout<wordCount()<>fileName; while (strcmp(fileName, "quit")!=0) { start = time(0); ptr->openFile(fileName); cout<
cin>>fileName; } cout<count()<
cin>>choice; if (strcmp(choice, "yes")==0) { ptr->traverse(defaultVisit, PREORDER); }
cout<>choice; if (strcmp(choice, "yes")==0) { start = time(0); int rankCount =0; RankTree *rankPtr = new RankTree; rankPtr->setFreq(ptr->getFreq()); rankPtr->setName((char*)ptr->key());
ptr->traverse(insertRank, (void*)rankPtr);
rankPtr->traverse(visitRank, (void*)(&rankCount), MAXORDER, rankPtr); cout<>choice; if (strcmp(choice, "yes")==0) { rankPtr->traverse(defaultVisit, MAXORDER, rankPtr); } cout<count()<wordCount(); ptr->saveToFile(DefaultBackup); } }
return 0; }
int WordTree::wordCounter = 0; int WordTree::wordLength = 20;
void insertRank(Tree* node, void* userPtr) { RankTree* rankPtr;// rankPtr = (RankTree*)(((RankTree*)userPtr)->searchWord((RankTree*)userPtr,(char*)node->key())); if (rankPtr == NULL) { rankPtr = new RankTree; rankPtr->setName((char*)node->key());
rankPtr->setFreq(node->getFreq()); ((RankTree*)(userPtr))->insert(rankPtr, (RankTree*)userPtr); } }
RankTree::RankTree() { decCount(); }
void visitRank(Tree* node, void* userPtr) { node->setRank(*(int*)(userPtr)); *(int*)(userPtr) +=1; }
bool RankTree::operator
bool RankTree::operator >(Tree& node) { int result; result = getFreq() - node.getFreq(); if (result !=0) { return (result>0); } else { return (strcmp((char*)key(), (char*)node.key())>0); } }
void RankTree::setRankTree(WordTree* wordRoot) { RankTree* ptr;
if (wordRoot!=NULL) { if (this->root()!=NULL) { ptr = new RankTree; ptr->setFreq(wordRoot->getFreq()); ptr->setKey((void*)wordRoot->key()); ptr->insert(ptr); } else { this->setRoot(this); this->setFreq(wordRoot->getFreq()); this->setKey((void*)wordRoot->key()); }
setRankTree((WordTree*)wordRoot->left()); setRankTree((WordTree*)wordRoot->right()); }
}
BinaryTree::~BinaryTree() {
}
WordTree::~WordTree() {
}
void WordTree::setWordCounter() { wordCounter ++; }
const int WordTree::getWordLength() { return wordLength; }
WordTree* WordTree::addNode(WordTree* node, FILE** stream) { char buffer[MaxWordLength]; int index, pIndex, number; WordTree *domino; char *ptr=buffer, ch;
fread((void*)(&index), sizeof(int), 1, *stream); fread((void*)(&pIndex), sizeof(int), 1, *stream);
if (index==-1&&pIndex==-1) { return NULL; }
do { ch = fgetc(*stream); *ptr = ch; ptr++; } while(ch!=''); fread((void*)(&number), sizeof(int), 1, *stream); setWordCounter(); //add a word
if (pIndex== -1) //this is root { node->setIndex(index); node->setFreq(number); node->setName(buffer); node->setRoot(node); return node; }
while (pIndex!=node->getIndex()) { if (node->parent()==NULL) { return NULL; } node= (WordTree*)node->parent(); }
if (node!=NULL&&pIndex==node->getIndex()) { domino = new WordTree; domino->setIndex(index); domino->setName(buffer); domino->setFreq(number); domino->setParent(node); if (*dominosetLeft(domino); } else { node->setRight(domino); }
return domino; } else { return NULL; } }
WordTree* WordTree::readNode(WordTree* node, FILE** stream) { WordTree* domino = node;
while (domino!=NULL) { domino = addNode(domino, stream); } return (WordTree*)node->root(); //this is ugly as I have to remove those sentinal }
WordTree* WordTree::readFromFile(const char* fileName) { FILE* stream;
if ((stream=fopen(fileName, "r+b"))==NULL) { cout<
void WordTree::saveNode(WordTree* node, FILE** stream) { int first, second, number;
first = node->getIndex(); number = getFreq();
fwrite((void*)(&first), sizeof(int), 1, *stream); if (node->parent()==NULL) { second = -1; } else { second = node->parent()->getIndex(); } fwrite((void*)(&second), sizeof(int), 1, *stream); fwrite((void*)(node->name()),sizeof(char), strlen(node->name()), *stream); fputc('', *stream);//I have to put this '/n' otherwise there is always problem! fwrite((void*)(&number), sizeof(int), 1, *stream); }
void traverseTree(WordTree* node, FILE** stream) { if (node!=NULL) { node->saveNode(node, stream); traverseTree((WordTree*)node->left(), stream); traverseTree((WordTree*)node->right(), stream); } }
void writeEnd(FILE** stream) { int temp1=-1, temp2=-1; fwrite((void*)(&temp1), sizeof(int), 1, *stream); fwrite((void*)(&temp2), sizeof(int), 1, *stream); }
void WordTree::saveToFile(const char* fileName) { void traverseTree(WordTree* node, FILE ** stream); void writeEnd(FILE** stream);
WordTree *domino;
FILE * stream;
if ((stream=fopen("c:nickback.txt", "w+b"))==NULL) { cout<
const int Tree::count() { return treeCounter; }
void WordTree::setWordLength(const int newLength) { if (newLength>MaxWordLength) { cout<
int WordTree::compare(const void*otherKey) { return strcmp((char*)key(), (char*)otherKey); }
int WordTree::wordCount() { return wordCounter; }
WordTree::WordTree() {
}
void WordTree::openFile(const char* fileName) { FILE * stream; if ((stream=fopen(fileName, "r+b"))==NULL) { cout<
void WordTree::readWord(FILE **stream) { char buffer[MaxWordLength]; char *ptr = buffer; char ch; int counter =0;
while (!feof(*stream)) { ptr = buffer; counter = 0; ch = toupper(fgetc(*stream)); while (isalnum(ch)&&!feof(*stream)&&counter
if (ptr!=buffer) { *ptr=''; addWord(buffer); } } }
WordTree* WordTree::searchWord(WordTree* node, const char* buffer) { int result; if (node!=NULL) { result = strcmp((char*)(node->key()), buffer); if (result ==0) { return node; } else { if (result < 0) { return searchWord((WordTree*)node->right(), buffer); } else { return searchWord((WordTree*)node->left(), buffer); } } } else { return NULL; } }
WordTree* WordTree::findWord(const char *buffer) { return searchWord((WordTree*)root(), buffer); }
void WordTree::addWord(char * buffer) { //need add a findword method and find first and then new WordTree *ptr; ptr = findWord(buffer); if (ptr==NULL) { ptr = new WordTree;
ptr->setName(buffer); if (ptr->insert(ptr)) { setWordCounter(); } } else { ptr->setFreq(); } }
void WordTree::setName(const char *str) { Tree::setName(str); setKey((void*)name()); }
int Tree::treeCounter= 0;
WordTree::WordTree(const char* fileName) { WordTree::WordTree(); openFile(fileName); }
void WordTree::setKey(void * newKey) { pKey = newKey; }
bool WordTree::operator
bool WordTree::operator ==(Tree& second) { if (strcmp((char*)key(), (char*)second.key())==0) { return true; } else { return false; } }
bool WordTree::operator >(Tree& second) { if (strcmp((char*)key(), (char*)second.key())>0) { return true; } else { return false; } }
void defaultVisit(Tree* node) { cout<getIndex()<name()<getFreq()<getRank()<parent()!=NULL) { cout<parent()->name()<parent()->left()) { cout<
void Tree::preWalk(Tree* node, void (*visit)(Tree* node)) { if (node!=NULL) { visit(node); preWalk(node->left(), visit); preWalk(node->right(), visit); } }
void Tree::postWalk(Tree* node, void (*visit)(Tree* node)) { if (node!=NULL) { postWalk(node->left(), visit); postWalk(node->right(), visit); visit(node); } }
void Tree::maxWalk(Tree* node, void (*visit)(Tree* node)) { if (node!=NULL) { maxWalk(node->right(), visit); visit(node); maxWalk(node->left(), visit);
} }
void Tree::midWalk(Tree* node, void (*visit)(Tree* node)) { if (node!=NULL) { midWalk(node->left(), visit); visit(node); midWalk(node->right(), visit); } }
void Tree::preWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr) { if (node!=NULL) { visit(node, userPtr); preWalk(node->left(), visit, userPtr); preWalk(node->right(), visit, userPtr); } }
void Tree::midWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr) { if (node!=NULL) { midWalk(node->left(), visit, userPtr); visit(node, userPtr); midWalk(node->right(), visit, userPtr); } }
void Tree::postWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr) { if (node!=NULL) { postWalk(node->left(), visit, userPtr); postWalk(node->right(), visit, userPtr); visit(node, userPtr); } }
void Tree::maxWalk(Tree* node, void (*visit)(Tree* node, void* userPtr), void* userPtr) { if (node!=NULL) { maxWalk(node->right(), visit, userPtr); visit(node, userPtr); maxWalk(node->left(), visit, userPtr); } }
void Tree::traverse(void (*visit)(Tree* node, void* userPtr), void* userPtr, TraverseOrder defaultOrder, Tree* start) { Tree *domino; domino = start;
switch(defaultOrder) { case PREORDER: preWalk(domino, visit, userPtr); break; case MIDORDER: midWalk(domino, visit, userPtr); break; case POSTORDER: postWalk(domino, visit, userPtr); break; case MAXORDER: maxWalk(domino, visit, userPtr); break; } }
void Tree::traverse(void (*visit)(Tree* node), TraverseOrder defaultOrder, Tree* start) { Tree *domino; domino = start;
switch(defaultOrder) { case PREORDER: preWalk(domino, visit); break; case MIDORDER: midWalk(domino, visit); break; case POSTORDER: postWalk(domino, visit); break; case MAXORDER: maxWalk(domino, visit); break; } }
Tree* Tree::pRoot = NULL;
void BinaryTree::remove(BinaryTree* node) { BinaryTree* ptr=NULL;
if (node->left()==NULL || node->right()==NULL) { if (node->left()!=NULL) { node->parent()->setLeft(node->left()); node->left()->setParent(node->parent()); } if (node->right()!=NULL) { node->parent()->setRight(node->right()); node->right()->setParent(node->parent()); } } else { ptr = node->next(); remove(ptr); ptr->setParent(node->parent()); if (node->parent()==NULL) { node->setRoot(ptr); } else { if (node==node->parent()->left()) { node->parent()->setLeft(ptr); } else { node->parent()->setRight(ptr); } ptr->setLeft(node->left()); ptr->setRight(node->right()); } } }
bool Tree::operator==(Tree& second) { if (this->compare(second.key())==0) { return true; } else { return false; } }
bool Tree::operator compare(second.key())<0) { return true; } else { return false; } }
bool Tree::operator >(Tree& second) { if (this->compare(second.key())>0) { return true; } else { return false; } }
int Tree::compare(const void * otherKey) { return *((int*)(key()))- *((int*)(otherKey)); }
void* Tree::key() { return pKey; }
void Tree::setKey(void *newKey) { pKey = malloc(sizeof(int)); *(int*)(pKey) = *(int*)(newKey); }
void Tree::setRoot(Tree *node) { pRoot = node; }
bool BinaryTree::insert(BinaryTree* node, BinaryTree* start) { Tree* ptr, *dummy;
ptr = start; dummy = ptr; while (ptr!=NULL) { dummy = ptr; if (*ptr>*node) { ptr = ptr->left(); } else { if (*ptrright(); } else { if (*ptr==*node) { return false; } } } }
node->setParent(dummy);
if (dummy==NULL) { setRoot(node); } else { // node->setRoot(dummy->root()); if (*dummy>*node) { dummy->setLeft(node); } else { if (*dummysetRight(node); } } //if "== ", donot insert; } return true; }
void Tree::setLeft(Tree* leftSon) { pLeft = leftSon; }
void Tree::setRight(Tree* rightSon) { pRight = rightSon; }
void Tree::setParent(Tree* father) { pParent = father; }
void Tree::giveBirth() { Tree* leftTree = new Tree; Tree* rightTree = new Tree; char str[128];
leftTree->setIndex(this->getIndex() *2 ); rightTree->setIndex(this->getIndex() * 2 + 1);
this->setLeft(leftTree); this->setRight(rightTree);
leftTree->setParent(this); rightTree->setParent(this);
strcpy(str, "left son of "); strcat(str, this->name());
leftTree->setName(str);
strcpy(str, "right son of "); strcat(str, this->name());
rightTree->setName(str);
leftTree->setParent(this); rightTree->setParent(this);
if (root()==NULL) { setRoot(this); setParent(NULL); } /* else { leftTree->setRoot(root()); rightTree->setRoot(root()); } */ }
void Tree::setIndex(const int number) { index = number; }
const int Tree::getIndex() { return index; }
const char* Tree::name() { return pName; }
void Tree::setName(const char *str) { if (str!=NULL) { pName = (char *)malloc(strlen(str)+1); strcpy(pName, str); } }
Tree::Tree() { setIndex(treeCounter); treeCounter++; pLeft = NULL; pRight = NULL; pParent = NULL; pName = NULL; pKey = NULL; freq = 0; rank =0; }
Tree::~Tree() { if (pName!=NULL) { free(pName); } if (pKey!=NULL) { free(pKey); } }
Tree* Tree::left() { return pLeft; }
Tree* Tree::right() { return pRight; }
Tree* Tree::parent() { return pParent; }
BinaryTree* BinaryTree::next() { Tree* result = NULL;
if (right()!=NULL) { result = right(); while (result->left!=NULL) { result = result->left(); } } else { if (parent()!= NULL) { result = this; while(result->parent()!=NULL&&result==result->parent()->right()) { result = result->parent(); } } //result = null } return (BinaryTree*)result; }
BinaryTree* BinaryTree::prev() { Tree* result = NULL; if (left()!=NULL) { result = left(); while (result->right()!=NULL) { result = result->right(); } } else { if (parent()!=NULL) { result = this; while (result->parent()!=NULL&&result==result->parent()->left()) { result = result->parent(); } } } return (BinaryTree*)result; }
http://www3.sympatico.ca/qingzhehuang/SuperDictionary.htm
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/10752043/viewspace-993038/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/10752043/viewspace-993038/
本文介绍了一个使用二叉搜索树实现的高效字典结构,该字典能够快速读取大量词汇并进行频率排序。文章详细描述了字典的设计理念、主要功能以及进一步改进的方向。
1512

被折叠的 条评论
为什么被折叠?



