HBST = Hash-BinarySortTree

最新推荐文章于 2025-01-10 01:07:13 发布

原创最新推荐文章于 2025-01-10 01:07:13 发布 · 1.6k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#null #class #insert #string #query #设计模式

读书笔记同时被 3 个专栏收录

61 篇文章

订阅专栏

设计模式

25 篇文章

订阅专栏

其他

25 篇文章

订阅专栏

本文介绍了一种将字符串Hash函数与二叉排序树相结合的数据结构实现方法，通过使用不同的Hash函数并采用策略模式来提高代码的扩展性和灵活性。

Hash-BinarySortTree

在上一篇的文章中我对字符串常见的Hash函数进行了总结，在原来的代码中Hash的缓冲数组的元素是一条链表。即每个元素下“挂”的是一个单链表。我采用了设计模式中的策略模式对上次的代码进行了改造，把每个hash函数封装成一个类，这样程序更易于扩展和修改。除了对原来的代码进行重构之外，在Hash缓冲区数组中，每个元素下“挂”的不再是单链表，而是BinarySortTree（二叉排序树），本来想“挂”BalanceBinaryTree（平衡二叉树）后面考虑编程复杂度太大了，就没整成BBT。

UML图如下：

Demo代码如下：

/* FileName: allHash.cpp Author: ACb0y Create Time: 2011年3月21日19:19:27 Last Modify Time: 2011年3月21日20:17:59 */ #include <iostream> using namespace std; typedef unsigned int uint; typedef struct node { char * str; node * lchild; node * rchild; }node; //Hash抽象类 class Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: virtual uint hashValue(char * str) = 0; }; class SDBM_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint SDBM_Hash::hashValue(char * str); * 功能：获取字符串的SDBM哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的SDBM哈希值 */ uint SDBM_Hash::hashValue(char * str) { uint hash = 0; while (*str) { hash = (*str++) + (hash << 6) + (hash << 16) - hash; } return (hash & 0x7FFFFFFF); } class RS_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint RS_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的RS哈希值 */ uint RS_Hash::hashValue(char * str) { uint a = 63689; uint b = 378551; uint hash = 0; while (*str) { hash = hash * a + (*str++); a *= b; } return (hash & 0x7FFFFFFF); } class JS_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint JS_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的JS哈希值 */ uint JS_Hash::hashValue(char * str) { uint hash = 1315423911; while (*str) { hash ^= ((hash << 5) + (*str++) + (hash >> 2)); } return (hash & 0x7FFFFFFF); } class PJW_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint PJW_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的PJW哈希值 */ uint PJW_Hash::hashValue(char * str) { uint BitsInUnignedInt = (uint)(sizeof(uint) * 8); uint ThreeQuarters = (uint)((BitsInUnignedInt * 3) / 4); uint OneEighth = (uint)(BitsInUnignedInt / 8); uint HighBits = (uint)(0x7FFFFFFF) << (BitsInUnignedInt - OneEighth); uint hash = 0; uint test = 0; while (*str) { hash = (hash << OneEighth) + (*str++); if ((test = hash & HighBits) != 0) { hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits)); } } return (hash & 0x7FFFFFFF); } class ELF_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint ELF_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的ELF哈希值 */ uint ELF_Hash::hashValue(char * str) { uint hash = 0; uint x = 0; while (*str) { hash = (hash << 4) + (*str++); if ((x = hash & 0xF0000000L) != 0) { hash ^= (x >> 24); hash &= ~x; } } return (hash & 0x7FFFFFFF); } class BKDR_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint BKDR_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的BKDR哈希值 */ uint BKDR_Hash::hashValue(char * str) { uint seed = 131; //31, 131, 1313, 13131, 131313 etc... uint hash = 0; while (*str) { hash = hash * seed + (*str++); } return (hash & 0x7FFFFFFF); } class DJB_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint DJB_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的BKDR哈希值 */ uint DJB_Hash::hashValue(char * str) { uint hash = 5381; while (*str) { hash += (hash << 5) + (*str++); } return (hash & 0x7FFFFFFF); } class AP_Hash : public Hash { //构造函数与析构函数 private: //nothing. protected: //nothing. public: //nothing. //属性 private: //nothing. protected: //nothing. public: //nothing. //服务 private: //nothing. protected: //nothing. public: uint hashValue(char * str); }; /** * 函数名： uint AP_Hash::hashValue(char * str); * 功能：获取字符串的RS哈希值 * 参数： * 输入：str (char *): 字符串 * 输出：无 * 返回值：字符串的BKDR哈希值 */ uint AP_Hash::hashValue(char * str) { uint hash = 0; int i; for (i = 0; *str; ++i) { if ((i & 1) == 0) { hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3)); } else { hash ^= (~((hash) << 11 ^ (*str++) ^ (hash >> 5))); } } return (hash & 0x7FFFFFFF); } /** * GCC中的字符串Hash函数ext/hash_fun.h * 函数名： size_t __stl_hash_string(const char * __s); * 功能：获取字符串的RS哈希值 * 参数： * 输入：__s (const char *): 字符串 * 输出：无 * 返回值：字符串的哈希值 */ inline size_t __stl_hash_string(const char * __s) { unsigned long __h = 0; for (; *__s; ++__s) { __h = 5 * __h + *__s; } return size_t(__h); } //string哈希容器类 class HashString { //构造函数与析构函数 private: //nothing. protected: //nothing. public: HashString(Hash * pHash); ~HashString(); //属性 private: Hash * m_pHash; node * m_pBuffer[100000]; protected: //nothing. public: //nothing. //服务 private: node * getNode(); void clear(node * root); uint getHashValue(char * str); protected: //nothing. public: bool insert(char * str); char * query(char * str); }; /** * 函数名：node * HashString::getNode(); * 功能：申请一个节点的空间 * 参数：无 * 返回值：申请的节点的指针 */ node * HashString::getNode() { node * pNew = (node *)malloc(sizeof(node)); pNew->str = NULL; pNew->lchild = NULL; pNew->rchild = NULL; return pNew; } /** * 函数名：void HashString::clear(node * root); * 功能：释放二叉排序树root的空间 * 参数： * 输入： * root (node *): 二叉排序树的根 * 输出：无 * 返回值：无 */ void HashString::clear(node * root) { if (NULL == root) { return; } if (NULL != root->lchild) { clear(root->lchild); } if (NULL != root->rchild) { clear(root->rchild); } free(root); } /** * 函数名：void HashString::getHashValue(char * str); * 功能：获取str的hash值 * 参数： * 输入： * str (char *): 字符串 * 输出：无 * 返回值：str的hash值 */ uint HashString::getHashValue(char * str) { return m_pHash->hashValue(str) % 100000; } /** * 函数名：HashString::HashString(Hash * pHash); * 功能：构造函数 * 参数： * 输入： * pHash (Hash *): 抽象类Hash的指针 * 输出：无 * 返回值：无 */ HashString::HashString(Hash * pHash) { m_pHash = pHash; memset(m_pBuffer, 0, sizeof(m_pBuffer)); } /** * 函数名：HashString::~HashString(); * 功能：析构函数 * 参数：无 * 返回值：无 */ HashString::~HashString() { for (int i = 0; i < 100000; ++i) { clear(m_pBuffer[i]); m_pBuffer[i] = NULL; } } /** * 函数名：bool HashString::insert(char * str); * 功能：把字符串插入HashString中 * 参数： * 输入： * str (char *): 要插入的字符串 * 输出：无 * 返回值： * true: 插入成功 * false: 插入失败 */ bool HashString::insert(char * str) { uint hashValue = getHashValue(str); node * pCur = m_pBuffer[hashValue]; node * pNew = NULL; if (NULL == pCur) { pNew = getNode(); if (NULL == pNew) { return false; } pNew->str = (char *)malloc(strlen(str) + 1); if (NULL == pNew->str) { free(pNew); return false; } strcpy(pNew->str, str); m_pBuffer[hashValue] = pNew; return true; } else { pCur = m_pBuffer[hashValue]; while (NULL != pCur) { if (strcmp(str, pCur->str) >= 0) { if (NULL == pCur->rchild) { pNew = getNode(); if (NULL == pNew) { return false; } pNew->str = (char *)malloc(strlen(str) + 1); if (NULL == pNew->str) { return false; } strcpy(pNew->str, str); pCur->rchild = pNew; break; } pCur = pCur->rchild; } else { if (NULL == pCur->lchild) { pNew = getNode(); if (NULL == pNew) { return false; } pNew->str = (char *)malloc(strlen(str) + 1); if (NULL == pNew->str) { free(pNew); return false; } strcpy(pNew->str, str); pCur->lchild = pNew; break; } pCur = pCur->lchild; } } return true; } } /** * 函数名：char * HashString::query(char * str); * 功能：在HashString中查找指定的字符串str * 参数： * 输入： * str (char *): 要查询的字符串 * 输出：无 * 返回值：查找到的字符串指针 */ char * HashString::query(char * str) { uint hashValue = getHashValue(str); if (NULL == m_pBuffer[hashValue]) { return NULL; } node * pCur = m_pBuffer[hashValue]; while (NULL != pCur) { int cmpRes = strcmp(str, pCur->str); if (0 == cmpRes) { return pCur->str; } else if (cmpRes > 0) { pCur = pCur->rchild; } else { pCur = pCur->lchild; } } return NULL; } /* void generateStr() { int tmp = rand() % 150; ++tmp; for (int i = 0; i < tmp; ++i) { int t = rand() % 26; printf("%c", t + 'a'); } printf("/n"); } void generateAll() { printf("100000/n"); for (int i = 0; i <= 100000; ++i) { generateStr(); } } */ int main() { /* srand(time(NULL)); freopen("in.txt", "w", stdout); generateAll(); */ freopen("in.txt", "r", stdin); freopen("out.txt", "w", stdout); int n, k; char str[200]; BKDR_Hash bkdr_hash; HashString hash(&bkdr_hash); int find = 0; int unfind = 0; while (scanf("%d%d/n", &n, &k) != EOF) { while (n--) { gets(str); hash.insert(str); } while (k--) { gets(str); char * pStr = hash.query(str); if (NULL == pStr) { ++unfind; cout << "not found the string." << endl; } else { ++find; cout << "find the string = " << pStr << endl; } } cout << "find = " << find << endl; cout << "unfind = " << unfind << endl; } return 0; }