Hash函数:Hash,一般翻译做"散列",也有直接音译为"哈希"的,就是把任意长度的输入(又叫做预映射, pre-image),通过散列算法,变换成固定长度的输出,该输出就是散列值。这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出,而不可能从散列值来唯一的确定输入值。简单的说就是一种将任意长度的消息压缩到某一固定长度的消息摘要的函数。完美的hash函数,满足key1!=key2,则hash(key1)!=hash(key2)。
字符串Hash函数:在处理大规模字符串数据时常常需要把每个字符串映射出一个ID值,以下列出一些常用经典的字符串Hash函数。
#include <iostream>
using namespace std;
//Brian Kernighan与Dennis Ritchie的《The C Program Language》中提出,也是Java中采用的Hash算法
template<class T>
unsigned int BKDRHash(const T* str)
{
register unsigned int hash = 0;
register unsigned int seed = 131;//31,131,1313,13131,131313...
while(*str)
{
hash = hash*seed + (*str++);
//hash = (hash << 7) + (hash << 1) + hash + (*str++);
}
//return (hash&0x7FFFFFFF);
return hash;
}
//在开源项目SDBM(数据库引擎)中应用,和BKDRHash方法基本相同只是seed不同
template<class T>
unsigned int SDBMHash(const T* str)
{
register unsigned int hash = 0;
register unsigned int seed = 65599;
while(*str)
{
hash = hash*seed + (*str++);
//hash = (hash << 6) + (hash << 16) - hash + (*str++);
}
//return (hash&0x7FFFFFFF);
return hash;
}
//在Robert Sedgwicks的《Algorithms in C》中提出
template<class T>
unsigned int RSHash(const T* str)
{
register unsigned int hash = 0;
register unsigned int magic = 63689;
register unsigned int factor = 378551;
while(*str)
{
hash = hash*magic + (*str++);
magic *= factor;
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Arash Partow提出
template<class T>
unsigned int APHash(const T* str)
{
register unsigned int hash = 0;
unsigned int ch;
for(long i=0;ch = (unsigned int)(*str++); ++i)
{
if(0==(i&1))
{
hash ^= ((hash << 7)^ch^(hash >> 3));
}
else
{
hash ^= (~((hash << 11)^ch^(hash >> 5)));
}
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Justin Sobel 提出
template<class T>
unsigned int JSHash(const T* str)
{
register unsigned int hash = 1315423911;
while(*str)
{
hash ^= ((hash << 5) + (hash >> 2) + (*str++));
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Donald E. Knuth在《Art Of Computer Programming Volume 3》中提出
template<class T>
unsigned int DEKHash(const T* str)
{
register unsigned int hash = 1315423911;
while(*str)
{
hash ^= ((hash << 5)^(hash >> 2)^(*str++));
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Unix System中使用的一种Hash算法,后在Microsoft中hash_map中实现
template<class T>
unsigned int FNVHash(const T* str)
{
register unsigned int hash = 2166136261;
while(*str)
{
hash *= 16777619;
hash ^= (*str++);
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Daniel J. Bernstein教授发明的一种Hash算法
template<class T>
unsigned int DJBHash(const T* str)
{
register unsigned int hash = 5381;
while(*str)
{
hash += ((hash << 5) + *str++);
}
//return (hash&0x7FFFFFFF);
return hash;
}
//Daniel J. Bernstein教授发明的又一种Hash算法
template<class T>
unsigned int DJB2Hash(const T* str)
{
register unsigned int hash = 5381;
while(*str)
{
hash = (hash*33)^(*str++);
}
//return (hash&0x7FFFFFFF);
return hash;
}
//基于AT&T贝尔实验室Peter J. Weinberger的论文而发明的一种Hash算法
template<class T>
unsigned int PJWHash(const T* str)
{
static const unsigned int TotalBits = sizeof(unsigned int)*8;
static const unsigned int ThreeQuarters = (TotalBits*3)/4;
static const unsigned int OneEighth = TotalBits/8;
static const unsigned int HighBits = ((unsigned int)-1) << (TotalBits-OneEighth);
register unsigned int hash = 0;
unsigned int magic = 0;
while(*str)
{
hash = (hash<<OneEighth)+(*str++);
if((magic=(hash&HighBits))!=0)
{
hash = ((hash^(magic>>ThreeQuarters))&(~HighBits));
}
}
//return (hash&0x7FFFFFFF);
return hash;
}
//由Unix的Extended Library Function附带的一种Hash算法,其实是PJW Hash的变形
template<class T>
unsigned int ELFHash(const T* str)
{
static const unsigned int TotalBits = sizeof(unsigned int)*8;
static const unsigned int ThreeQuarters = (TotalBits*3)/4;
static const unsigned int OneEighth = TotalBits/8;
static const unsigned int HighBits = ((unsigned int)-1) << (TotalBits-OneEighth);
register unsigned int hash = 0;
unsigned int magic = 0;
while(*str)
{
hash = (hash<<OneEighth)+(*str++);
if((magic=(hash&HighBits))!=0)
{
hash ^= (magic >> ThreeQuarters);
hash &= ~magic;
}
}
//return (hash&0x7FFFFFFF);
return hash;
}
//编程珠玑中提到的一种
template<class T>
unsigned int PearlsHash(const T* str)
{
register unsigned int hash = 0;
register unsigned int NHASH = 29989;
register unsigned int MULT = 31;
while(*str)
{
hash = (hash*MULT)+(*str++);
}
return (hash%NHASH);
}
int main()
{
char* str = "192.168.10.0";
unsigned int hashCode;
hashCode = BKDRHash<char>(str);
cout << "BKDRHash hashCode = " << hashCode << endl;
hashCode = SDBMHash<char>(str);
cout << "SDBMHash hashCode = " << hashCode << endl;
hashCode = RSHash<char>(str);
cout << "RSHash hashCode = " << hashCode << endl;
hashCode = APHash<char>(str);
cout << "APHash hashCode = " << hashCode << endl;
hashCode = JSHash<char>(str);
cout << "JSHash hashCode = " << hashCode << endl;
hashCode = DEKHash<char>(str);
cout << "DEKHash hashCode = " << hashCode << endl;
hashCode = FNVHash<char>(str);
cout << "FNVHash hashCode = " << hashCode << endl;
hashCode = DJBHash<char>(str);
cout << "DJBHash hashCode = " << hashCode << endl;
hashCode = DJB2Hash<char>(str);
cout << "DJB2Hash hashCode = " << hashCode << endl;
hashCode = PJWHash<char>(str);
cout << "PJWHash hashCode = " << hashCode << endl;
hashCode = ELFHash<char>(str);
cout << "ELFHash hashCode = " << hashCode << endl;
hashCode = PearlsHash<char>(str);
cout << "PearlsHash hashCode = " << hashCode << endl;
system("pause");
return 0;
}
注:以上代码是在Win+VS2012中运行