Binary-Search-010-Tree-优快云博客

本文对比了二叉搜索树(BST)与序列搜索列表(SST)在单词频率统计任务中的性能表现，通过读取《圣经》文本文件，使用BST和SST两种数据结构分别统计单词出现次数，并比较其运行时间。实验证明，在大规模数据集上，BST的效率远高于SST。

BST.h

#ifndef BINARY_SEARCH_TREE_BST_H
#define BINARY_SEARCH_TREE_BST_H

#include <iostream>
#include <queue>
#include <cassert>

using namespace std;


template<typename Key, typename Value>
class BST{

private:
    struct Node{
        Key key;
        Value value;
        Node *left;
        Node *right;

        Node(Key key, Value value){
            this->key = key;
            this->value = value;
            this->left = this->right = NULL;
        }

        Node(Node *node){
            this->key = node->key;
            this->value = node->value;
            this->left = node->left;
            this->right = node->right;
        }
    };

    Node *root;
    int count;

public:
    BST(){
        root = NULL;
        count = 0;
    }

    ~BST(){
        destroy(root);
        assert( count == 0 );
    }

    // 返回二叉树的元素个数
    int size(){
        return count;
    }

    // 返回二叉树是否为空
    int isEmpty(){
        return count == 0;
    }

    // 插入节点
    void insert(Key key, Value value){
        root = insert(root, key, value);
    }

    // 检查二叉树是否包含键值为key的元素
    bool contain(Key key){
        return contain(root, key);
    }

    // 在二叉树中寻找键值为key的value值
    Value* search(Key key){
        return search(root, key);
    }

    // 前序遍历
    void preOrder(){
        preOrder(root);
    }

    // 中序遍历
    void inOrder(){
        inOrder(root);
    }

    // 后序遍历
    void postOrder(){
        postOrder(root);
    }

    // 层序遍历
    void levelOrder(){
        queue<Node*> q;
        q.push(root);
        while( !q.empty() ){
            Node *node = q.front();
            q.pop();
            cout<<node->key<<" ";
            if( node->left )
                q.push(node->left);
            if( node->right )
                q.push(node->right);
        }
    }

    // 寻找最小的键值
    Key minimum(){
        assert( count != 0 );
        Node* minNode = minimum(root);
        return minNode->key;
    }

    // 寻找最大的键值
    Key maximum(){
        assert( count != 0 );
        Node* maxNode = maximum(root);
        return maxNode->key;
    }

    // 从二叉树中删除最小值所在节点
    void removeMin(){
        if( root )
            root = removeMin( root );
    }

    // 从二叉树中删除最大值所在节点
    void removeMax(){
        if( root )
            root = removeMax( root );
    }

    // 从二叉树中删除键值为key的节点
    void remove(Key key){
        root = remove(root, key);
    }

private:
    // 向以node为根的二叉搜索树中,插入节点(key, value)
    // 返回插入新节点后的二叉搜索树的根
    Node* insert(Node* node, Key key, Value value){
        if( node == NULL ){
            count += 1;
            return new Node(key, value);
        }

        if( key == node->key )
            node->value = value;
        else if( key < node->key )
            node->left = insert(node->left, key, value);
        else // key > node->key
            node->right = insert(node->right, key, value);

        return node;
    }

    // 查看以node为根的二叉搜索树中是否包含键值为key的节点
    bool contain(Node* node, Key key){
        if( node == NULL )
            return false;

        if( key == node->key )
            return true;
        else if( key < node->key )
            return contain(node->left, key);
        else
            return contain(node->right, key);
    }

    // 在以node为根的二叉搜索树中查找key所对应的value
    Value* search(Node* node, Key key){

        if( node == NULL )
            return NULL;

        if( key == node->key )
            return &(node->value);
        else if( key < node->key )
            return search(node->left, key);
        else
            return search(node->right, key);
    }

    // 对以node为根的二叉搜索树进行前序遍历
    void preOrder(Node* node){

        if( node != NULL){
            cout<<node->key<<" ";
            preOrder(node->left);
            preOrder(node->right);
        }
    }

    // 对以node为根的二叉搜索树进行中序遍历
    void inOrder(Node* node){

        if( node != NULL){
            inOrder(node->left);
            cout<<node->key<<" ";
            inOrder(node->right);
        }
    }

    // 对以node为根的二叉搜索树进行后序遍历
    void postOrder(Node* node){

        if( node != NULL){
            postOrder(node->left);
            postOrder(node->right);
            cout<<node->key<<" ";
        }
    }

    void destroy(Node* node){
        if( node != NULL){
            destroy(node->left);
            destroy(node->right);
            delete node;
            count--;
        }
    }

    // 在以node为根的二叉搜索树中,返回最小键值的节点
    Node* minimum(Node* node){
        if( node->left == NULL )
            return node;
        return minimum( node->left );
    }

    // 在以node为根的二叉搜索树中,返回最大键值的节点
    Node* maximum(Node* node){
        if( node->right == NULL )
            return node;
        return maximum( node->right );
    }

    // 删除掉以node为根的二分搜索树中的最小节点
    // 返回删除节点后新的二分搜索树的根
    Node* removeMin(Node* node){

        if( node->left == NULL ){
            Node* rightNode = node->right;
            delete node;
            count --;
            return rightNode;
        }

        node->left = removeMin( node->left );
        return node;
    }

    // 删除掉以node为根的二分搜索树中的最大节点
    // 返回删除节点后新的二分搜索树的根
    Node* removeMax(Node* node){

        if( node->right == NULL ){
            Node* leftNode = node->left;
            delete node;
            count --;
            return leftNode;
        }

        node->right = removeMax( node->right );
        return node;
    }

    // 删除掉以node为根的二分搜索树中键值为key的节点
    // 返回删除节点后新的二分搜索树的根
    Node* remove(Node* node, Key key){

        if( node == NULL )
            return NULL;

        if( key < node->key ){
            node->left = remove( node->left , key );
            return node;
        }
        else if( key > node->key ){
            node->right = remove( node->right, key );
            return node;
        }
        else{ // key == node->key

            if( node->left == NULL){
                Node *rightNode = node->right;
                delete node;
                count--;
                return rightNode;
            }

            if( node->right == NULL ){
                Node *leftNode = node->left;
                delete node;
                count--;
                return leftNode;
            }

            assert( node->left != NULL && node->right != NULL );

            Node *successor = new Node(minimum(node->right));
            count ++;

            successor->right = removeMin(node->right);
            successor->left = node->left;

            delete node;
            count --;

            return successor;
        }
    }

};
#endif

FileOps.h

#ifndef BINARY_SEARCH_TREE_FILEOPS_H
#define BINARY_SEARCH_TREE_FILEOPS_H

#include <string>
#include <iostream>
#include <fstream>
#include <vector>

using namespace std;


namespace FileOps{

    int firstCharacterIndex(const string& s, int start){
        for( int i = start ; i < s.length() ; i ++ )
            if( isalpha(s[i]) )
                return i;
        return s.length();
    }

    string lowerS( const string& s){

        string ret = "";
        for( int i = 0 ; i < s.length() ; i ++ )
            ret += tolower(s[i]);
        return ret;
    }

    bool readFile(const string& filename, vector<string> &words){

        string line;
        string contents = "";
        ifstream file(filename);
        if( file.is_open() ){
            while( getline(file, line))
                contents += ( line + "\n" );
            file.close();
        }
        else{
            cout<<"Can not open "<<filename<<" !!!"<<endl;
            return false;
        }

        int start = firstCharacterIndex(contents, 0);
        for( int i = start + 1 ; i <= contents.length() ; ){

            if( i == contents.length() || !isalpha(contents[i]) ){
                words.push_back( lowerS( contents.substr(start,i-start) ) );
                start = firstCharacterIndex(contents, i);
                i = start + 1;
            }
            else{
                i ++;
            }
        }

        return true;
    }
}
#endif

SequenceST.h

#ifndef BINARY_SEARCH_TREE_SEQUENCESEARCHLIST_H
#define BINARY_SEARCH_TREE_SEQUENCESEARCHLIST_H

#include <iostream>
#include <cassert>

using namespace std;


template<typename Key, typename Value>
class SequenceST{
private:
    struct Node{
        Key key;
        Value value;
        Node *next;

        Node(Key key, Value value){
            this->key = key;
            this->value = value;
            this->next = NULL;
        }
    };

    Node* head;
    int count;

public:
    SequenceST(){
        head = NULL;
        count = 0;
    }
    ~SequenceST(){
        while( head != NULL){
            Node *node = head;
            head = head->next;
            delete node;
            count --;
        }

        assert( head == NULL && count == 0 );
    }

    int size(){
        return count;
    }

    bool isEmpty(){
        return count == 0;
    };

    void insert(Key key, Value value){
        Node *node = head;
        while( node != NULL ){
            if( key == node->key ){
                node->value = value;
                return;
            }
            node = node->next;
        }

        Node *newNode = new Node(key, value);
        newNode->next = head;
        head = newNode;
        count ++;
    }

    bool contain(Key key){

        Node *node = head;
        while( node != NULL ){
            if( key == node->key ){
                return true;
            }
            node = node->next;
        }

        return false;
    }

    Value* search(Key key){

        Node *node = head;
        while( node != NULL ){
            if( key == node->key ){
                return &(node->value);
            }
            node = node->next;
        }

        return NULL;
    }

    void remove(Key key){

        if( key == head->key ){
            Node* delNode = head;
            head = head->next;
            delete delNode;
            count--;
            return;
        }

        Node *node = head;
        while( node->next != NULL && node->next->key != key )
            node = node->next;

        if( node->next != NULL ){
            Node* delNode = node->next;
            node->next = delNode->next;
            delete delNode;
            count --;
            return;
        }
    }
};
#endif

main_bst_basics.cpp

#include <iostream>
#include <ctime>
#include "BST.h"

using namespace std;


int main() {

    srand(time(NULL));
    BST<int,int> bst = BST<int,int>();

    int n = 10;
    for( int i = 0 ; i < n ; i ++ ){
        int key = rand()%n;
        // 为了后续测试方便,这里value值取和key值一样
        int value = key;
        cout<<key<<" ";
        bst.insert(key,value);
    }
    cout<<endl;

    // test size
    cout<<"size: "<<bst.size()<<endl<<endl;

    // test preOrder
    cout<<"preOrder: ";
    bst.preOrder();
    cout<<endl<<endl;

    // test inOrder
    cout<<"inOrder: ";
    bst.inOrder();
    cout<<endl<<endl;

    // test postOrder
    cout<<"postOrder: ";
    bst.postOrder();
    cout<<endl<<endl;

    // test levelOrder
    cout<<"levelOrder: ";
    bst.levelOrder();
    cout<<endl<<endl;

    // test contain and search
    for( int i = 0 ; i < n ; i ++ ){
        //cout<<"TRY "<<i<<" -- ";
        if( bst.contain(i) ){
            int* res = bst.search(i);
            //cout<<"contain "<<i<<", value: "<<*res<<endl;
            assert( res != NULL && *res == i );
        }
        //else{
        //    cout<<"not conatin "<<i<<endl;
        //}
    }

    return 0;
}

main_bst_remove.cpp

#include <iostream>
#include <ctime>
#include <ctime>
#include <algorithm>
#include "BST.h"

using namespace std;


void shuffle( int arr[], int n ){

    srand( time(NULL) );
    for( int i = n-1 ; i >= 0 ; i -- ){
        int x = rand()%(i+1);
        swap( arr[i] , arr[x] );
    }
}

int main() {

    srand(time(NULL));
    BST<int,int> bst = BST<int,int>();

    int n = 10000;
    for( int i = 0 ; i < n ; i ++ ){
        int key = rand()%n;
        // 为了后续测试方便,这里value值取和key值一样
        int value = key;
        //cout<<key<<" ";
        bst.insert(key,value);
    }
    //cout<<endl;


//    // test removeMin
//    while( !bst.isEmpty() ){
//        cout<<"min: "<<bst.minimum()<<endl;
//        bst.removeMin();
//        cout<<"After removeMin, size = "<<bst.size()<<endl;
//    }

    // test removeMax
    while( !bst.isEmpty() ){
        cout<<"max: "<<bst.maximum()<<endl;
        bst.removeMax();
        cout<<"After removeMax, size = "<<bst.size()<<endl;
    }

    // test remove
    // remove elements in random order
    int order[n];
    for( int i = 0 ; i < n ; i ++ )
        order[i] = i;
    shuffle( order , n );

    for( int i = 0 ; i < n ; i ++ )
        if( bst.contain( order[i] )){
            bst.remove( order[i] );
            cout<<"After remove "<<order[i]<<" size = "<<bst.size()<<endl;
        }

    return 0;
}

main_bst_sst_cmp.cpp

#include <iostream>
#include <string>
#include <vector>
#include "BST.h"
#include "SequenceST.h"
#include "FileOps.h"

using namespace std;


int main() {

    string filename = "bible.txt";
    vector<string> words;
    if( FileOps::readFile(filename, words) ) {

        cout << "There are totally " << words.size() << " words in " << filename << endl;

        cout << endl;


        // test BST
        time_t startTime = clock();
        BST<string, int> bst = BST<string, int>();
        for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
            int *res = bst.search(*iter);
            if (res == NULL)
                bst.insert(*iter, 1);
            else
                (*res)++;
        }

        cout << "'god' : " << *bst.search("god") << endl;
        time_t endTime = clock();
        cout << "BST , time: " << double(endTime - startTime) / CLOCKS_PER_SEC << " s." << endl;

        cout << endl;


        // test SST
        startTime = clock();
        SequenceST<string, int> sst = SequenceST<string, int>();
        for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
            int *res = sst.search(*iter);
            if (res == NULL)
                sst.insert(*iter, 1);
            else
                (*res)++;
        }

        cout << "'god' : " << *sst.search("god") << endl;

        endTime = clock();
        cout << "SST , time: " << double(endTime - startTime) / CLOCKS_PER_SEC << " s." << endl;

    }

    return 0;
}

main_sst.cpp

#include <iostream>
#include <ctime>
#include "SequenceST.h"

using namespace std;


int main() {

    srand(time(NULL));
    SequenceST<int,int> sst = SequenceST<int,int>();

    int n = 100;
    for( int i = 0 ; i < n ; i ++ ){
        int key = rand()%n;
        // 为了后续测试方便,这里value值取和key值一样
        int value = key;
        sst.insert(key,value);
    }

    cout<<"size: "<<sst.size()<<endl<<endl;

    for( int i = 0 ; i < n ; i ++ ){
        if( sst.contain(i) ){
            int* res = sst.search(i);
            assert( res != NULL && *res == i );
        }
    }

    return 0;
}