一.Hash简介
概念:把任意长度的输入,通过hash算法,变换成固定长度的输出,该输出就是散列值。这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出。
哈希表:
若结构中存在和关键字K相等的记录,则必定在f(K)的存储位置上。由此,不需比较便可直接取得所查记录。称这个对应关系f为散列函数(Hash function),按这个思想建立的表为哈希表。
哈希冲突:
对不同的关键字可能得到同一散列地址,即key1≠key2,而f(key1)=f(key2),这种现象称冲突。
解决Hash冲突常用的是拉链法(哈希链表),即hash表中的每个元素是一个链表,相同的hash值构成一个链表。查找时,从hash表中的链表第一个节点开始遍历比较key值,直到key相同查找结束
时间复杂度:
使用hash算法对关键字进行查找,理论上时间复杂度是O(1),但实际取决于散列函数的的选取,最坏的情况是O(n)。
二.常用字符串hash
关键字是字符串。常用hash函数:
1.加法hash
unsigned additiveHash(string key, unsigned prime){
unsigned hash, i;
for (hash = key.length(), i = 0; i < key.length(); i++)
hash += key[i];
return (hash % prime);
}
2. 乘法hash
unsigned bernstein(string key, unsigned prime){
unsigned hash, i;
for (hash=0, i=0; i < key.length(); ++i)
hash = 33*hash + key[i];
return (hash % prime);
}
33为推荐的乘数,另外推荐的乘数还有:131, 1313, 13131, 131313等等。
3. 位运算Hash
unsigned rotatingHash(string key, unsigned prime){
unsigned hash, i;
for (hash=key.length(), i=0; i < key.length(); ++i)
hash = (hash<<4)^(hash>>28)^key[i];
return (hash % prime);
}
通过利用各种位运算(常见的是移位和异或)来充分的混合输入元素。
三.示例
下面是常用字符串hash算法的example:
#include <iostream>
#include <string>
#include <vector>
#include <time.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
unsigned additiveHash(string key, unsigned prime)
{
unsigned hash, i;
for (hash = key.length(), i = 0; i < key.length(); i++)
hash += key[i];
return (hash % prime);
}
unsigned bernstein(string key, unsigned prime)
{
unsigned hash, i;
for (hash=0, i=0; i < key.length(); ++i)
hash = 33*hash + key[i];
return (hash % prime);
}
unsigned rotatingHash(string key, unsigned prime)
{
unsigned hash, i;
for (hash=key.length(), i=0; i < key.length(); ++i)
hash = (hash<<4)^(hash>>28)^key[i];
return (hash % prime);
}
unsigned (*hash_func)(string key, unsigned prime);
struct DataNode
{
string key;
void *data;
DataNode *next;
};
#define HASH_TBL_LEN 20001
struct HashNode
{
DataNode *node;
int count;
} hash_tbl[HASH_TBL_LEN];
DataNode nodes[10000];
void hash_init(struct HashNode *tbl, int len)
{
for (int i = 0; i < len; i++)
{
tbl[i].node = NULL;
tbl[i].count = 0;
}
}
void hash_insert(struct HashNode *tbl, int len, DataNode *node)
{
if (!node)
return;
unsigned index = hash_func(node->key, len);
if (tbl[index].node == NULL)
{
tbl[index].node = node;
tbl[index].node->next = NULL;
tbl[index].count = 1;
}
else
{
DataNode *p = tbl[index].node->next;
tbl[index].node->next = node;
node->next = p;
tbl[index].count++;
}
}
DataNode *hash_find(struct HashNode *tbl, int len, string key)
{
unsigned index = hash_func(key, len);
DataNode *p = tbl[index].node;
while (p)
{
if (p->key == key)
return p;
p = p->next;
}
return NULL;
}
void hash_delete(struct HashNode *tbl, int len, string key)
{
unsigned index = hash_func(key, len);
DataNode *p = tbl[index].node;
if (!p)
return;
if (p->key == key)
{
tbl[index].node = p->next;
}
else
{
DataNode *q = p->next;
while (q != NULL)
{
if (q->key == key)
{
p->next = q->next;
tbl[index].count--;
break;
}
p = q;
q = q->next;
}
}
}
void test_hash(const vector<string> &keys)
{
hash_init(hash_tbl, HASH_TBL_LEN);
int i = 0;
int collision_cnt = 0, empty_cnt = 0;
for (vector<string>::const_iterator iter = keys.begin(); iter != keys.end(); iter++)
{
DataNode *p = nodes + i;
p->next = NULL;
p->key = *iter;
hash_insert(hash_tbl, HASH_TBL_LEN, p);
i++;
}
DataNode *p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);
if (p)
cout << "find: " << keys[5000] << endl;
hash_delete(hash_tbl, HASH_TBL_LEN, keys[5000]);
p = hash_find(hash_tbl, HASH_TBL_LEN, keys[5000]);
if (!p)
cout << "delete : " << keys[5000] << endl;
for (int j = 0; j < HASH_TBL_LEN; j++)
{
if (hash_tbl[j].count > 1)
collision_cnt++;
if (hash_tbl[j].count == 0)
empty_cnt++;
}
cout << "collision: " << collision_cnt << " empty: " << empty_cnt << endl;
}
const char *alpha = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
int main()
{
srand((unsigned)time(NULL));
int alpha_len = strlen(alpha);
vector<string> keys;
for (int i = 0; i < 10000; i++)
{
int len = rand() % 21 + 6;
string key;
for (int j = 0; j < len; j++)
{
int k = rand() % alpha_len;
key += (alpha[k]);
}
keys.push_back(key);
}
cout << "bernstein hash" << endl;
hash_func = bernstein;
test_hash(keys);
cout << "-------------------------------" << endl;
cout << "additiveHash hash" << endl;
hash_func = additiveHash;
test_hash(keys);
cout << "-------------------------------" << endl;
cout << "rotatingHash hash" << endl;
hash_func = rotatingHash;
test_hash(keys);
return 0;
}
本文总结了Hash算法的基础知识,包括Hash简介和常用的字符串Hash方法,如加法、乘法和位运算Hash。通过示例展示了如何使用位运算来混合输入元素,以实现更高效的哈希计算。
1071

被折叠的 条评论
为什么被折叠?



