BST.h
#ifndef BINARY_SEARCH_TREE_BST_H
#define BINARY_SEARCH_TREE_BST_H
#include <iostream>
#include <queue>
#include <cassert>
using namespace std;
template<typename Key, typename Value>
class BST{
private:
struct Node{
Key key;
Value value;
Node *left;
Node *right;
Node(Key key, Value value){
this->key = key;
this->value = value;
this->left = this->right = NULL;
}
Node(Node *node){
this->key = node->key;
this->value = node->value;
this->left = node->left;
this->right = node->right;
}
};
Node *root;
int count;
public:
BST(){
root = NULL;
count = 0;
}
~BST(){
destroy(root);
assert( count == 0 );
}
// 返回二叉树的元素个数
int size(){
return count;
}
// 返回二叉树是否为空
int isEmpty(){
return count == 0;
}
// 插入节点
void insert(Key key, Value value){
root = insert(root, key, value);
}
// 检查二叉树是否包含键值为key的元素
bool contain(Key key){
return contain(root, key);
}
// 在二叉树中寻找键值为key的value值
Value* search(Key key){
return search(root, key);
}
// 前序遍历
void preOrder(){
preOrder(root);
}
// 中序遍历
void inOrder(){
inOrder(root);
}
// 后序遍历
void postOrder(){
postOrder(root);
}
// 层序遍历
void levelOrder(){
queue<Node*> q;
q.push(root);
while( !q.empty() ){
Node *node = q.front();
q.pop();
cout<<node->key<<" ";
if( node->left )
q.push(node->left);
if( node->right )
q.push(node->right);
}
}
// 寻找最小的键值
Key minimum(){
assert( count != 0 );
Node* minNode = minimum(root);
return minNode->key;
}
// 寻找最大的键值
Key maximum(){
assert( count != 0 );
Node* maxNode = maximum(root);
return maxNode->key;
}
// 从二叉树中删除最小值所在节点
void removeMin(){
if( root )
root = removeMin( root );
}
// 从二叉树中删除最大值所在节点
void removeMax(){
if( root )
root = removeMax( root );
}
// 从二叉树中删除键值为key的节点
void remove(Key key){
root = remove(root, key);
}
private:
// 向以node为根的二叉搜索树中,插入节点(key, value)
// 返回插入新节点后的二叉搜索树的根
Node* insert(Node* node, Key key, Value value){
if( node == NULL ){
count += 1;
return new Node(key, value);
}
if( key == node->key )
node->value = value;
else if( key < node->key )
node->left = insert(node->left, key, value);
else // key > node->key
node->right = insert(node->right, key, value);
return node;
}
// 查看以node为根的二叉搜索树中是否包含键值为key的节点
bool contain(Node* node, Key key){
if( node == NULL )
return false;
if( key == node->key )
return true;
else if( key < node->key )
return contain(node->left, key);
else
return contain(node->right, key);
}
// 在以node为根的二叉搜索树中查找key所对应的value
Value* search(Node* node, Key key){
if( node == NULL )
return NULL;
if( key == node->key )
return &(node->value);
else if( key < node->key )
return search(node->left, key);
else
return search(node->right, key);
}
// 对以node为根的二叉搜索树进行前序遍历
void preOrder(Node* node){
if( node != NULL){
cout<<node->key<<" ";
preOrder(node->left);
preOrder(node->right);
}
}
// 对以node为根的二叉搜索树进行中序遍历
void inOrder(Node* node){
if( node != NULL){
inOrder(node->left);
cout<<node->key<<" ";
inOrder(node->right);
}
}
// 对以node为根的二叉搜索树进行后序遍历
void postOrder(Node* node){
if( node != NULL){
postOrder(node->left);
postOrder(node->right);
cout<<node->key<<" ";
}
}
void destroy(Node* node){
if( node != NULL){
destroy(node->left);
destroy(node->right);
delete node;
count--;
}
}
// 在以node为根的二叉搜索树中,返回最小键值的节点
Node* minimum(Node* node){
if( node->left == NULL )
return node;
return minimum( node->left );
}
// 在以node为根的二叉搜索树中,返回最大键值的节点
Node* maximum(Node* node){
if( node->right == NULL )
return node;
return maximum( node->right );
}
// 删除掉以node为根的二分搜索树中的最小节点
// 返回删除节点后新的二分搜索树的根
Node* removeMin(Node* node){
if( node->left == NULL ){
Node* rightNode = node->right;
delete node;
count --;
return rightNode;
}
node->left = removeMin( node->left );
return node;
}
// 删除掉以node为根的二分搜索树中的最大节点
// 返回删除节点后新的二分搜索树的根
Node* removeMax(Node* node){
if( node->right == NULL ){
Node* leftNode = node->left;
delete node;
count --;
return leftNode;
}
node->right = removeMax( node->right );
return node;
}
// 删除掉以node为根的二分搜索树中键值为key的节点
// 返回删除节点后新的二分搜索树的根
Node* remove(Node* node, Key key){
if( node == NULL )
return NULL;
if( key < node->key ){
node->left = remove( node->left , key );
return node;
}
else if( key > node->key ){
node->right = remove( node->right, key );
return node;
}
else{ // key == node->key
if( node->left == NULL){
Node *rightNode = node->right;
delete node;
count--;
return rightNode;
}
if( node->right == NULL ){
Node *leftNode = node->left;
delete node;
count--;
return leftNode;
}
assert( node->left != NULL && node->right != NULL );
Node *successor = new Node(minimum(node->right));
count ++;
successor->right = removeMin(node->right);
successor->left = node->left;
delete node;
count --;
return successor;
}
}
};
#endif
FileOps.h
#ifndef BINARY_SEARCH_TREE_FILEOPS_H
#define BINARY_SEARCH_TREE_FILEOPS_H
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
namespace FileOps{
int firstCharacterIndex(const string& s, int start){
for( int i = start ; i < s.length() ; i ++ )
if( isalpha(s[i]) )
return i;
return s.length();
}
string lowerS( const string& s){
string ret = "";
for( int i = 0 ; i < s.length() ; i ++ )
ret += tolower(s[i]);
return ret;
}
bool readFile(const string& filename, vector<string> &words){
string line;
string contents = "";
ifstream file(filename);
if( file.is_open() ){
while( getline(file, line))
contents += ( line + "\n" );
file.close();
}
else{
cout<<"Can not open "<<filename<<" !!!"<<endl;
return false;
}
int start = firstCharacterIndex(contents, 0);
for( int i = start + 1 ; i <= contents.length() ; ){
if( i == contents.length() || !isalpha(contents[i]) ){
words.push_back( lowerS( contents.substr(start,i-start) ) );
start = firstCharacterIndex(contents, i);
i = start + 1;
}
else{
i ++;
}
}
return true;
}
}
#endif
SequenceST.h
#ifndef BINARY_SEARCH_TREE_SEQUENCESEARCHLIST_H
#define BINARY_SEARCH_TREE_SEQUENCESEARCHLIST_H
#include <iostream>
#include <cassert>
using namespace std;
template<typename Key, typename Value>
class SequenceST{
private:
struct Node{
Key key;
Value value;
Node *next;
Node(Key key, Value value){
this->key = key;
this->value = value;
this->next = NULL;
}
};
Node* head;
int count;
public:
SequenceST(){
head = NULL;
count = 0;
}
~SequenceST(){
while( head != NULL){
Node *node = head;
head = head->next;
delete node;
count --;
}
assert( head == NULL && count == 0 );
}
int size(){
return count;
}
bool isEmpty(){
return count == 0;
};
void insert(Key key, Value value){
Node *node = head;
while( node != NULL ){
if( key == node->key ){
node->value = value;
return;
}
node = node->next;
}
Node *newNode = new Node(key, value);
newNode->next = head;
head = newNode;
count ++;
}
bool contain(Key key){
Node *node = head;
while( node != NULL ){
if( key == node->key ){
return true;
}
node = node->next;
}
return false;
}
Value* search(Key key){
Node *node = head;
while( node != NULL ){
if( key == node->key ){
return &(node->value);
}
node = node->next;
}
return NULL;
}
void remove(Key key){
if( key == head->key ){
Node* delNode = head;
head = head->next;
delete delNode;
count--;
return;
}
Node *node = head;
while( node->next != NULL && node->next->key != key )
node = node->next;
if( node->next != NULL ){
Node* delNode = node->next;
node->next = delNode->next;
delete delNode;
count --;
return;
}
}
};
#endif
main_bst_basics.cpp
#include <iostream>
#include <ctime>
#include "BST.h"
using namespace std;
int main() {
srand(time(NULL));
BST<int,int> bst = BST<int,int>();
int n = 10;
for( int i = 0 ; i < n ; i ++ ){
int key = rand()%n;
// 为了后续测试方便,这里value值取和key值一样
int value = key;
cout<<key<<" ";
bst.insert(key,value);
}
cout<<endl;
// test size
cout<<"size: "<<bst.size()<<endl<<endl;
// test preOrder
cout<<"preOrder: ";
bst.preOrder();
cout<<endl<<endl;
// test inOrder
cout<<"inOrder: ";
bst.inOrder();
cout<<endl<<endl;
// test postOrder
cout<<"postOrder: ";
bst.postOrder();
cout<<endl<<endl;
// test levelOrder
cout<<"levelOrder: ";
bst.levelOrder();
cout<<endl<<endl;
// test contain and search
for( int i = 0 ; i < n ; i ++ ){
//cout<<"TRY "<<i<<" -- ";
if( bst.contain(i) ){
int* res = bst.search(i);
//cout<<"contain "<<i<<", value: "<<*res<<endl;
assert( res != NULL && *res == i );
}
//else{
// cout<<"not conatin "<<i<<endl;
//}
}
return 0;
}
main_bst_remove.cpp
#include <iostream>
#include <ctime>
#include <ctime>
#include <algorithm>
#include "BST.h"
using namespace std;
void shuffle( int arr[], int n ){
srand( time(NULL) );
for( int i = n-1 ; i >= 0 ; i -- ){
int x = rand()%(i+1);
swap( arr[i] , arr[x] );
}
}
int main() {
srand(time(NULL));
BST<int,int> bst = BST<int,int>();
int n = 10000;
for( int i = 0 ; i < n ; i ++ ){
int key = rand()%n;
// 为了后续测试方便,这里value值取和key值一样
int value = key;
//cout<<key<<" ";
bst.insert(key,value);
}
//cout<<endl;
// // test removeMin
// while( !bst.isEmpty() ){
// cout<<"min: "<<bst.minimum()<<endl;
// bst.removeMin();
// cout<<"After removeMin, size = "<<bst.size()<<endl;
// }
// test removeMax
while( !bst.isEmpty() ){
cout<<"max: "<<bst.maximum()<<endl;
bst.removeMax();
cout<<"After removeMax, size = "<<bst.size()<<endl;
}
// test remove
// remove elements in random order
int order[n];
for( int i = 0 ; i < n ; i ++ )
order[i] = i;
shuffle( order , n );
for( int i = 0 ; i < n ; i ++ )
if( bst.contain( order[i] )){
bst.remove( order[i] );
cout<<"After remove "<<order[i]<<" size = "<<bst.size()<<endl;
}
return 0;
}
main_bst_sst_cmp.cpp
#include <iostream>
#include <string>
#include <vector>
#include "BST.h"
#include "SequenceST.h"
#include "FileOps.h"
using namespace std;
int main() {
string filename = "bible.txt";
vector<string> words;
if( FileOps::readFile(filename, words) ) {
cout << "There are totally " << words.size() << " words in " << filename << endl;
cout << endl;
// test BST
time_t startTime = clock();
BST<string, int> bst = BST<string, int>();
for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
int *res = bst.search(*iter);
if (res == NULL)
bst.insert(*iter, 1);
else
(*res)++;
}
cout << "'god' : " << *bst.search("god") << endl;
time_t endTime = clock();
cout << "BST , time: " << double(endTime - startTime) / CLOCKS_PER_SEC << " s." << endl;
cout << endl;
// test SST
startTime = clock();
SequenceST<string, int> sst = SequenceST<string, int>();
for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
int *res = sst.search(*iter);
if (res == NULL)
sst.insert(*iter, 1);
else
(*res)++;
}
cout << "'god' : " << *sst.search("god") << endl;
endTime = clock();
cout << "SST , time: " << double(endTime - startTime) / CLOCKS_PER_SEC << " s." << endl;
}
return 0;
}
main_sst.cpp
#include <iostream>
#include <ctime>
#include "SequenceST.h"
using namespace std;
int main() {
srand(time(NULL));
SequenceST<int,int> sst = SequenceST<int,int>();
int n = 100;
for( int i = 0 ; i < n ; i ++ ){
int key = rand()%n;
// 为了后续测试方便,这里value值取和key值一样
int value = key;
sst.insert(key,value);
}
cout<<"size: "<<sst.size()<<endl<<endl;
for( int i = 0 ; i < n ; i ++ ){
if( sst.contain(i) ){
int* res = sst.search(i);
assert( res != NULL && *res == i );
}
}
return 0;
}
本文对比了二叉搜索树(BST)与序列搜索列表(SST)在单词频率统计任务中的性能表现,通过读取《圣经》文本文件,使用BST和SST两种数据结构分别统计单词出现次数,并比较其运行时间。实验证明,在大规模数据集上,BST的效率远高于SST。

被折叠的 条评论
为什么被折叠?



