字符串Hash函数

本文介绍了Hash函数的概念,它将任意长度的输入转化为固定长度的输出,作为散列值。对于字符串,Hash函数常用于大规模数据处理中,将字符串映射为ID。虽然存在不同的输入可能散列成相同值的情况,但这种函数提供了高效的数据映射方式。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Hash函数:Hash,一般翻译做"散列",也有直接音译为"哈希"的,就是把任意长度的输入(又叫做预映射, pre-image),通过散列算法,变换成固定长度的输出,该输出就是散列值。这种转换是一种压缩映射,也就是,散列值的空间通常远小于输入的空间,不同的输入可能会散列成相同的输出,而不可能从散列值来唯一的确定输入值。简单的说就是一种将任意长度的消息压缩到某一固定长度的消息摘要的函数。完美的hash函数,满足key1!=key2,则hash(key1)!=hash(key2)。

字符串Hash函数:在处理大规模字符串数据时常常需要把每个字符串映射出一个ID值,以下列出一些常用经典的字符串Hash函数。

#include <iostream>

using namespace std;

//Brian Kernighan与Dennis Ritchie的《The C Program Language》中提出,也是Java中采用的Hash算法
template<class T>
unsigned int BKDRHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int seed = 131;//31,131,1313,13131,131313...
	while(*str)
	{
		hash = hash*seed + (*str++);
		//hash = (hash << 7) + (hash << 1) + hash + (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//在开源项目SDBM(数据库引擎)中应用,和BKDRHash方法基本相同只是seed不同
template<class T>
unsigned int SDBMHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int seed = 65599;
	while(*str)
	{
		hash = hash*seed + (*str++);
		//hash = (hash << 6) + (hash << 16) - hash + (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//在Robert Sedgwicks的《Algorithms in C》中提出
template<class T>
unsigned int RSHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int magic = 63689;
	register unsigned int factor = 378551;
	while(*str)
	{
		hash = hash*magic + (*str++);
		magic *= factor;
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Arash Partow提出
template<class T>
unsigned int APHash(const T* str)
{
	register unsigned int hash = 0;
	unsigned int ch;
	for(long i=0;ch = (unsigned int)(*str++); ++i)
	{
		if(0==(i&1))
		{
			hash ^= ((hash << 7)^ch^(hash >> 3));
		}
		else
		{
			hash ^= (~((hash << 11)^ch^(hash >> 5)));
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Justin Sobel 提出
template<class T>
unsigned int JSHash(const T* str)
{
	register unsigned int hash = 1315423911;
	while(*str)
	{
		hash ^= ((hash << 5) + (hash >> 2) + (*str++));
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Donald E. Knuth在《Art Of Computer Programming Volume 3》中提出
template<class T>
unsigned int DEKHash(const T* str)
{
	register unsigned int hash = 1315423911;
	while(*str)
	{
		hash ^= ((hash << 5)^(hash >> 2)^(*str++));
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Unix System中使用的一种Hash算法,后在Microsoft中hash_map中实现
template<class T>
unsigned int FNVHash(const T* str)
{
	register unsigned int hash = 2166136261;
	while(*str)
	{
		hash *= 16777619;
		hash ^= (*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Daniel J. Bernstein教授发明的一种Hash算法
template<class T>
unsigned int DJBHash(const T* str)
{
	register unsigned int hash = 5381;
	while(*str)
	{
		hash += ((hash << 5) + *str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//Daniel J. Bernstein教授发明的又一种Hash算法
template<class T>
unsigned int DJB2Hash(const T* str)
{
	register unsigned int hash = 5381;
	while(*str)
	{
		hash = (hash*33)^(*str++);
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}


//基于AT&T贝尔实验室Peter J. Weinberger的论文而发明的一种Hash算法
template<class T>
unsigned int PJWHash(const T* str)
{
	static const unsigned int TotalBits             = sizeof(unsigned int)*8;
	static const unsigned int ThreeQuarters             = (TotalBits*3)/4;
	static const unsigned int OneEighth             = TotalBits/8;
	static const unsigned int HighBits             = ((unsigned int)-1) << (TotalBits-OneEighth);

	register unsigned int hash = 0;
	unsigned int magic = 0;
	while(*str)
	{
		hash = (hash<<OneEighth)+(*str++);
		if((magic=(hash&HighBits))!=0)
		{
			hash = ((hash^(magic>>ThreeQuarters))&(~HighBits));
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//由Unix的Extended Library Function附带的一种Hash算法,其实是PJW Hash的变形
template<class T>
unsigned int ELFHash(const T* str)
{
	static const unsigned int TotalBits             = sizeof(unsigned int)*8;
	static const unsigned int ThreeQuarters         = (TotalBits*3)/4;
	static const unsigned int OneEighth             = TotalBits/8;
	static const unsigned int HighBits              = ((unsigned int)-1) << (TotalBits-OneEighth);

	register unsigned int hash = 0;
	unsigned int magic = 0;
	while(*str)
	{
		hash = (hash<<OneEighth)+(*str++);
		if((magic=(hash&HighBits))!=0)
		{
			hash ^= (magic >> ThreeQuarters);
			hash &= ~magic;
		}
	}
	//return (hash&0x7FFFFFFF);
	return hash;
}

//编程珠玑中提到的一种
template<class T>
unsigned int PearlsHash(const T* str)
{
	register unsigned int hash = 0;
	register unsigned int NHASH = 29989;
	register unsigned int MULT = 31;
	while(*str)
	{
		hash = (hash*MULT)+(*str++);
	}
	return (hash%NHASH);
}

int main()
{
	char* str = "192.168.10.0";

	unsigned int hashCode;
	hashCode = BKDRHash<char>(str);
	cout << "BKDRHash hashCode = " << hashCode << endl;

	hashCode = SDBMHash<char>(str);
	cout << "SDBMHash hashCode = " << hashCode << endl;

	hashCode = RSHash<char>(str);
	cout << "RSHash hashCode = " << hashCode << endl;

	hashCode = APHash<char>(str);
	cout << "APHash hashCode = " << hashCode << endl;

	hashCode = JSHash<char>(str);
	cout << "JSHash hashCode = " << hashCode << endl;

	hashCode = DEKHash<char>(str);
	cout << "DEKHash hashCode = " << hashCode << endl;

	hashCode = FNVHash<char>(str);
	cout << "FNVHash hashCode = " << hashCode << endl;

	hashCode = DJBHash<char>(str);
	cout << "DJBHash hashCode = " << hashCode << endl;

	hashCode = DJB2Hash<char>(str);
	cout << "DJB2Hash hashCode = " << hashCode << endl;

	hashCode = PJWHash<char>(str);
	cout << "PJWHash hashCode = " << hashCode << endl;

	hashCode = ELFHash<char>(str);
	cout << "ELFHash hashCode = " << hashCode << endl;

	hashCode = PearlsHash<char>(str);
	cout << "PearlsHash hashCode = " << hashCode << endl;

	system("pause");
	return 0;
}

注:以上代码是在Win+VS2012中运行

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值