哈希算法（闭散列）- 支持string（仿函数的应用 / 偏特化）

最新推荐文章于 2025-07-29 21:31:10 发布

ZCDL_

最新推荐文章于 2025-07-29 21:31:10 发布

阅读量411

点赞数 6

CC 4.0 BY-SA版权

分类专栏： C++ 文章标签： c++ 开发语言哈希算法

本文链接：https://blog.youkuaiyun.com/ZCDL1314/article/details/143469408

C++ 专栏收录该内容

4 篇文章

订阅专栏

仿函数的应用

仿函数的应用就是替代C语言中的函数指针，仿函数就是模板 + 结构体 + 运算符重载

闭散列仿函数实现方式1

直接分开定义：模板定义一类结构体，string特殊的单独定义

// 直接string类型单独定义一个结构体
// HashFunc<class K>做Hash（仿函数）的缺省参数
template<class K>
struct HashFunc
{
	size_t operator()(const K& k)
	{
		return (size_t) k;
	}
};

struct HashString
{
	size_t operator()(const string& k)
	{
		// BKDR
		size_t hashi = 0;
		for (auto e : k)
		{
			hashi *= 31;
			hashi += e;
		}

		return hashi;
	}
};

直接string类型单独定义一个结构体
HashFunc<class K>做Hash（仿函数）的缺省参数

使用说明，当使用int参数时可以不加Hash（仿函数的声明），但是要使用string时实例化的是后必须要加class Hash的声明，确定使用的是string转成int的仿函数

模板定义的改变 - 增加了仿函数的类 - class Hash = HashFunc<K>（带缺省值 - int可用）

// Hash是仿函数的模板
// 如果不传默认走模板 - HashFunc<K>
template<class K, class V, class Hash = HashFunc<K>>

如果是以上方式一的仿函数实现方式，那么实例化时若是string类型需要手动传入HashString的结构体声明，确保调用此结构体中的仿函数

// 实例化
HashTable<string, string, HashString> ht;

仿函数实现一完整程序

// 仿函数 - 未使用偏特化（仿函数实现方式1）
template<class K>
struct HashFunc
{
	size_t operator()(const K& k)
	{
		return (size_t) k;
	}
};

struct HashString
{
	size_t operator()(const string& k)
	{
		size_t hashi = 0;
		for (auto e : k)
		{
			hashi *= 31;
			hashi += e;
		}

		return hashi;
	}
};

namespace open_adress
{
	enum Status
	{
		Empty,
		Exist,
		Delete
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		Status _s;
	};

	//一个结构体 / 一个类对应一个模板
	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
	public:
		// 构造函数
		HashTable()
		{
			_tables.resize(10);
		}

		// 线性探测
		bool Insert_Linear(const pair<K, V>& kv)
		{
			// string类型仿函数特殊声明
			Hash _hf; 

			if (Find(kv.first))
				return false;
			// 考虑扩容
			// 当负载因子等于0.7时就扩容
			if (_n * 10 / _tables.size() == 7)
			{
				size_t newSize = _tables.size() * 2;
				HashTable<K, V, Hash> newHT;
				newHT._tables.resize(newSize);

				for (size_t i = 0; i < _tables.size(); ++i)
				{
					if(_tables[i]._s == Exist)
						newHT.Insert_Linear(_tables[i]._kv);
				}

				//_tables.swap(newHT._tables);
				newHT._tables.swap(_tables);
			} 
			
			size_t hashi = _hf(kv.first) % _tables.size();

			while (_tables[hashi]._s == Exist)
			{
				++hashi;
				hashi %= _tables.size();
			}

			_tables[hashi]._kv = kv;
			_tables[hashi]._s = Exist;
			++_n;
			return true;
		}

		// 二次探测
		bool Insert_Twice(const pair<K, V>& kv)
		{
			Hash _hf;

			if (Find(kv.first))
				return false;

			if (_n * 10 / _tables.size() == 7)
			{
				size_t newSize = _tables.size() * 2;
				HashTable<K, V, Hash> newHt;
				newHt._tables.resize(newSize);
				
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					if (_tables[i]._s == Exist)
						newHt.Insert_Twice(_tables[i]._kv);
				}
					
				newHt._tables.swap(_tables);
			}

			size_t hashi = _hf(kv.first) % _tables.size();
			size_t hashtwice = 0;

			while (_tables[hashi]._s == Exist)
			{
				hashi = _hf(kv.first) % _tables.size();
				//++hashtwice; // 优化
				// Warning: +=: 从double转换到size_t，可能丢失数据
				hashi += size_t(pow(++hashtwice, 2));
				hashi %= _tables.size();
			}
			
			_tables[hashi]._kv = kv;
			_tables[hashi]._s = Exist;
			++_n;

			return true;
		}

		// Delete状态存在的意义：如果没有Delete状态，会导致Empty后面的数据均无法访问
		HashData<K, V>* Find(const K& k)
		{
			Hash _hf;
			size_t hashi = _hf(k) % _tables.size();

			while (_tables[hashi]._s != Empty)
			{
				// 易错点：如果此值是已经删除的值，那么也会被当作是存在的值，则被删除的值也会被Find
				//无法重新插入
				// 但插入值是只要Find不到就可以插入，所以会无法插入目标值，或把目标值覆盖
				// if (_tables[hashi]._kv.first == k)
				if(_tables[hashi]._s == Exist
				&& _tables[hashi]._kv.first == k)
				{
					return &_tables[hashi];
				}

				++hashi;
				hashi %= _tables.size();
			}

			return nullptr;
		}

		// 伪删除法
		bool Erase(const K& k)
		{
			HashData<K, V>* ret = Find(k);
			
			if (ret)
			{
				ret->_s = Delete;
				--_n;
				return true;
			}

			return false;
		}

		void Print()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._s == Exist)
				{
					//cout << "[" << i << "]-> " << _tables[i]._kv.first << endl;
					cout << "[" << i << "]-> " << _tables[i]._kv.first << " -> " << _tables[i]._kv.second << endl;
				}

				else if (_tables[i]._s == Empty)
				{
					cout << "[" << i << "]-> " << "Empty" << endl;
				}

				else
				{
					cout << "[" << i << "]-> " << "Delete" << endl;
				}
			}

			cout << endl;
		}


	private:
		vector<HashData<K, V>> _tables;
		size_t _n;
	};
	void TestString()
	{
		// 模板实例化
		HashTable<string, string, HashString> ht;
		
		ht.Insert_Twice(make_pair("Sort", "排序"));
	    ht.Insert_Twice(make_pair("Left", "左边"));
		ht.Insert_Twice(make_pair("Right", "右边"));
		ht.Insert_Twice(make_pair("Middle", "中间"));

		ht.Print();
	}
};

闭散列仿函数实现方式2

库中的unordered_map是不用在实例化时传HashString，（模板时要带有参数class Hash = HashFun<K>），要实现此效果就是要使用模板的偏特化template<>，模板偏特化的定义就是找最匹配，将string偏特化，如果是其它类型就走模板template<class K>，其余则走template<>，函数名均是HashFunc，所以缺省值中的HashFun<K>变化，不用再实例化传HashString

 仿函数 - 模板 + 结构体 + 运算符重载 - 意义：替代函数指针
template<class K>
struct HashFunc
{
	size_t operator()(const K& k)
	{
		return (size_t) k;
	}
};

// 偏特化 - 特殊化string，当为string时默认走此模板，偏特化的意义是在类型不是string的情况下走默认K的模板，在string走特殊化模板
template<>
struct HashFunc<string>
{
	size_t operator()(const string& k)
	{
		size_t hashi = 0;
		for (auto e : k)
		{
			hashi *= 31;
			hashi += e;
		}

		return hashi;
	}
};

缺省值是class Hash = HashFun<K>，使用偏特化，两个结构体相同，都调用同一个结构体，所以不用实例化传结构体，只是最优匹配原则，string类型会自动匹配HashFunc<string>，这样就解决了实例化要传HashString的问题，现在仅需要传key / value的类型即可

仿函数实现误区

以下为仿函数使用的错误案例：将string和K类型的封装再一起，在K为int的时候可以正常使用（二者构成函数重载 - 参数类型不同），但当K为string时就会冲突，二者函数名和参数均相同，就会产生冲突

//Error
template<class K>
struct HashFunc
{
	size_t operator()(const K& k)
	{
		return (size_t)k;
 
	}
	
	// string时会产生冲突
	size_t operator()(const string& k)
	{
		size_t hashi = 0;
		for (auto e : k)
		{
			hashi *= 31;
			hashi += e;
		}

		return hashi;
	}
};

string类型支持转换成key的测试代码

封装在Hashtable.h中，main函数中直接域作用名调用

void TestString()
{
	HashTable<string, string> ht;
		
	//ht.Insert_Linear(make_pair("Sort", "排序"));
	//ht.Insert_Linear(make_pair("Left", "左边"));
	//ht.Insert_Linear(make_pair("Right", "右边"));
	//ht.Insert_Linear(make_pair("Middle", "中间"));

	ht.Insert_Twice(make_pair("Sort", "排序"));
	ht.Insert_Twice(make_pair("Left", "左边"));
	ht.Insert_Twice(make_pair("Right", "右边"));
	ht.Insert_Twice(make_pair("Middle", "中间"));

	ht.Print();
}