hash算法总结

最新推荐文章于 2024-02-18 19:54:55 发布

原创最新推荐文章于 2024-02-18 19:54:55 发布 · 467 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#算法 #散列函数

算法学习专栏收录该内容

5 篇文章

订阅专栏

本文总结了Hash算法的基础知识，包括Hash简介和常用的字符串Hash方法，如加法、乘法和位运算Hash。通过示例展示了如何使用位运算来混合输入元素，以实现更高效的哈希计算。

一.Hash简介

概念：
把任意长度的输入，通过hash算法，变换成固定长度的输出，该输出就是散列值。这种转换是一种压缩映射，也就是，散列值的空间通常远小于输入的空间，不同的输入可能会散列成相同的输出。
哈希表：
若结构中存在和关键字K相等的记录，则必定在f(K)的存储位置上。由此，不需比较便可直接取得所查记录。称这个对应关系f为散列函数(Hash function)，按这个思想建立的表为哈希表。
哈希冲突：
对不同的关键字可能得到同一散列地址，即key1≠key2，而f(key1)=f(key2)，这种现象称冲突。
解决Hash冲突常用的是拉链法（哈希链表），即hash表中的每个元素是一个链表，相同的hash值构成一个链表。查找时，从hash表中的链表第一个节点开始遍历比较key值，直到key相同查找结束

时间复杂度：
使用hash算法对关键字进行查找，理论上时间复杂度是O(1)，但实际取决于散列函数的的选取，最坏的情况是O(n)。

二.常用字符串hash

关键字是字符串。

常用hash函数：

1.加法hash

unsigned additiveHash(string key, unsigned prime)
{
   unsigned hash, i;
   for (hash = key.length(), i = 0; i < key.length(); i++)
   hash += key[i];

   return (hash % prime);
}

2. 乘法hash

unsigned bernstein(string key, unsigned prime)
{
   unsigned hash, i;
   for (hash=0, i=0; i < key.length(); ++i)
   hash = 33*hash + key[i];

   return (hash % prime);
}
33为推荐的乘数，另外推荐的乘数还有：131, 1313, 13131, 131313等等。

3. 位运算Hash

unsigned rotatingHash(string key, unsigned prime)
{
   unsigned hash, i;
   for (hash=key.length(), i=0; i < key.length(); ++i)
   hash = (hash<<4)^(hash>>28)^key[i];
   return (hash % prime);
}

通过利用各种位运算（常见的是移位和异或）来充分的混合输入元素。

三.示例

下面是常用字符串hash算法的example：

#include <iostream>
#include <string>
#include <vector>
#include <time.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
unsigned additiveHash(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash = key.length(), i = 0; i < key.length(); i++)
                hash += key[i];

        return (hash % prime);
}
unsigned bernstein(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash=0, i=0; i < key.length(); ++i)
                hash = 33*hash + key[i];

        return (hash % prime);
}
unsigned rotatingHash(string key, unsigned prime)
{
        unsigned hash, i;
        for (hash=key.length(), i=0; i < key.length(); ++i)
                hash = (hash<<4)^(hash>>28)^key[i];

        return (hash % prime);
}
unsigned (*hash_func)(string key, unsigned prime);
struct DataNode
{
        string key;
        void *data;
        DataNode *next;
};
#define HASH_TBL_LEN 20001
struct HashNode
{
        DataNode *node;
        int count;
} hash_tbl[HASH_TBL_LEN];
DataNode nodes[10000];
void hash_init(struct HashNode *tbl, int len)
{
        for (int i = 0; i < len; i++)
        {
                tbl[i].node = NULL;
                tbl[i].count = 0;
        }
}
void hash_insert(struct HashNode *tbl, int len, DataNode *node)
{
        if (!node)
                return;

        unsigned index = hash_func(node->key, len);
        if (tbl[index].node == NULL)
        {
                tbl[index].node = node;
                tbl[index].node->next = NULL;
                tbl[index].count = 1;
        }
        else
        {
                DataNode *p = tbl[index].node->next;
                tbl[index].node->next = node;
                node->next = p;
                tbl[index].count++;
        }
}
DataNode *hash_find(struct HashNode *tbl, int len, string key)
{
        unsigned index = hash_func(key, len);
        DataNode *p = tbl[index].node;
        while (p)
        {
                if (p->key == key)
                        return p;

                p = p->next;
        }
        return NULL;
}
void hash_delete(struct HashNode *tbl, int len, string key)
{
        unsigned index = hash_func(key, len);
        DataNode *p = tbl[index].node;
        if (!p)
                return;

        if (p->key == key)
        {
                tbl[index].node = p->next;
        }
        else
        {
                DataNode *q = p->next;
                while (q != NULL)
                {
                        if (q->key == key)
                        {
                                p->next = q->next;
                                tbl[index].count--;
                                break;
                        }
                        p = q;
                        q = q->next;
                }
        }
}
void test_hash(const vector<string> &keys)
{
        hash_init(hash_tbl, HASH_TBL_LEN);
        int i = 0;
        int collision_cnt = 0, empty_cnt = 0;
        for (vector<string>::const_iterator iter = keys.begin(); iter != keys.end(); iter++)
        {
                DataNode *p = nodes + i;
                p->next = NULL;
                p->key = *iter;
                hash_insert(hash_tbl, HASH_TBL_LEN, p);
                i++;
        }
        DataNode *p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);
        if (p)
                cout << "find: " << keys[5000] << endl;
        hash_delete(hash_tbl, HASH_TBL_LEN, keys[5000]);
        p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);
        if (!p)
                cout << "delete : " << keys[5000] << endl;

        for (int j = 0; j < HASH_TBL_LEN; j++)
        {
                if (hash_tbl[j].count > 1)
                        collision_cnt++;
                if (hash_tbl[j].count == 0)
                        empty_cnt++;
        }
        cout << "collision: " << collision_cnt << "  empty: " << empty_cnt << endl;
}
const char *alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
int main()
{
        srand((unsigned)time(NULL));
        int alpha_len = strlen(alpha);
        vector<string> keys;
        for (int i = 0; i < 10000; i++)
        {
                int len = rand() % 21 + 6;
                string key;
                for (int j = 0; j < len; j++)
                {
                        int k = rand() % alpha_len;
                        key += (alpha[k]);
                }
                keys.push_back(key);
        }

        cout << "bernstein hash" << endl;
        hash_func = bernstein;
        test_hash(keys);
        cout << "-------------------------------" << endl;

        cout << "additiveHash hash" << endl;
        hash_func = additiveHash;
        test_hash(keys);
        cout << "-------------------------------" << endl;

        cout << "rotatingHash hash" << endl;
        hash_func = rotatingHash;
        test_hash(keys);
        return 0;
}