搜索结构之哈希

最新推荐文章于 2024-07-17 17:39:45 发布

SMQsmq

最新推荐文章于 2024-07-17 17:39:45 发布

阅读量308

点赞数

CC 4.0 BY-SA版权

文章标签： C++ 哈希

本文链接：https://blog.youkuaiyun.com/balabalabala111/article/details/73264989

本文介绍了哈希表的两种散列冲突解决方法：闭散列法和开散列法。闭散列法利用顺序表存储，效率高但易堆积，查找复杂；而开散列法则采用单链表，避免堆积但增加空间开销。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

哈希表是根据哈希函数确定每个关键码在表中的存储位置，处理哈希冲突会有开散列法和闭散列法：

闭散列法：运用顺序表存储，存储效率高，但容易产生堆积，查找不易实现，需要用到二次探测；

开散列法：运用单链表存储方式，不产生堆积现象，但因为附加了指针域增加了空间开销。

二者具体的实现方法如下：

闭散列法：

#include<string>
#include<vector>
#include<iostream>
using namespace std;

enum State
{
	EMPTY,//哈希表中该位置没有存放元素
	EXIST,//哈希表中该位置有元素存在
	DELETE,//哈希表中该位置元素已被删除（伪删除法）
};

template<class K, class V>
struct HashNode
{
	State _s;//标识该位置元素的状态
	pair<K, V> _kv;//哈希表中的元素用键值对来表示

	HashNode()
		:_s(EMPTY)
	{}
};

template<class K>
class _HashFun_
{
public:
	size_t operator()(const K& key)//用于处理整形
	{
		return key;
	}
};

template<>
class _HashFun_<string>
{
public:
	size_t BKDhash(const char* str)
	{
		return atoi(str);
	}
	size_t operator()(const string& str)
	{
		return BKDhash(str.c_str());
	}
};

//_HashFun_<K>用来返回key值，区别整形与字符串；IsLine区别使用二次探测还是一次探测
template<class K, class V,class HashFun=_HashFun_<K>,bool IsLine=true>
class Hash
{
public:
	typedef Hash<K,V,HashFun,IsLine> Self;
public:
	Hash(size_t size = GetNextPrime(0))
		:_size(0)
	{
		_table.resize(size);
	}

	bool Insert(const K& key, const V& value)
	{
		return _Insert(key, value);
	}

	bool Remove(const K& key)
	{
		return _Remove(key);
	}

	pair<HashNode<K,V>*, bool>Find(const K& key)//在表中查找值为key的元素
	{
		size_t index = HashFunc(key);
		size_t Addr = index;
		size_t i = 1;
		HashNode<K, V>& element = _table[index];
		while (_table[Addr]._s != EMPTY)//在查找某个值的过程中只要碰到了EMPTY还没找到就已经找不到了
		{
			HashNode<K, V>& elem = _table[Addr];
			if (_table[Addr]._kv.first == key)
			{
				if (_table[Addr]._s == EXIST)
				{
					cout << "已找到" << endl;
					return make_pair(&elem, true);
				}
				else//如果该状态为DELETE，就错误
				{
					cout << "已被删除" << endl;
					return make_pair(&elem, false);
				}
			}
			if (IsLine)
			{
				index = DetectFirst(index);
			}
			else
			{
				index = DetectSecond(index, i);
				++i;
			}
			if (Addr == _table.size())//循环查找，直到回到刚开始查找的地方
			{
				Addr = 0;
			}
			if (index == Addr)//已经找了一圈还没找到
			{
				cout << "没找到" << endl;
				return make_pair(&element, false);
			}
		}
		//碰到了空的地方
		cout << "没找到" << endl;
		return make_pair(&element, false);
	}

protected:
	vector<HashNode<K, V>> _table;
	size_t _size;//当前表中的有效元素

	//找第一个比num大的素数
	size_t GetNextPrime(size_t num)
	{
		// 使用素数表对齐做哈希表的容量，降低哈希冲突
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		for (size_t idx = 0; idx < _PrimeSize; ++idx)
		{
			if (_PrimeList[idx]>num)
			{
				return _PrimeList[idx];
			}
		}
		return _PrimeList[_PrimeSize-1];
	}

	size_t DetectFirst(size_t index)//一次探测
	{
		index += 1;
		if (index == _table.size())
		{
			index = 0;
		}
		return index;
	}

	size_t DetectSecond(size_t index,size_t i)//二次探测
	{
		//H2=H0+i*i  H3=H0+(i+1)^2 通过这两个式子合并生成如下式子
		index = index + 2 * i + 1;
		if (index >= _table.size())
		{
			index = 0;
		}
		return index;
	}

	bool _Remove(const K& key)
	{
		size_t index = HashFunc(key);
		size_t Addr = index;
		while (_table[Addr]._s == EXIST)
		{
			if (_table[Addr]._kv.first == key)
			{
				_table[Addr]._s = DELETE;//这里使用伪删除法，只是把状态置为DELETE即可
				_size--;
				return true;
			}
			else
			{
				Addr++;
				if (Addr == _table.size())//循环查找的方法
				{
					Addr = 0;
				}
				if (Addr == index)//已找完一圈
				{
					cout << "没找到" << endl;
					return false;
				}
			}
		}
		if (_table[Addr]._s == DELETE)
		{
			cout << "该元素已经被删除" << endl;
			return false;
		}
		cout << "没有找到该元素" << endl;
		return false;
	}

	void _CheckCapacity()
	{
		/*if (_table.size() == 0)
		{
			_table.resize(11);
		}*/
		//哈希表的大小从一开始就给了素数表中的第一个值，所以不可能为空
		//这里原本负载因子不能超过0.7，但是_size为整数，相除不可能是小数
		if (10 * _size / _table.size()>7)
		{
			//当负载因子达到一定数值时候，申请比原来大两倍的空间，
			//把原来的元素状态为EXIST的搬移过来，搬移的时候是要按照新开辟的空间大小来计算存储地址的

			/*size_t newSize = _table.size() * 2;*/
			//使用素数表时当需要重新分配大小不需要再*2

			Self ht(GetNextPrime(_table.size()));
			for (size_t idx = 0; idx < _table.size(); ++idx)
			{
				if (_table[idx]._s == EXIST)
				{
					ht.Insert(_table[idx]._kv.first, _table[idx]._kv.second);
				}
			}
			swap(_size, ht._size);
			_table.swap(ht._table);//这里的处理方式类似于深拷贝
		}
	}

	bool _Insert(const K& key, const V& value)
	{
		_CheckCapacity();
		size_t index = HashFunc(key);
		size_t i = 1;//i表示当前为第几次探测
		while (1)
		{
			if (_table[index]._s != EXIST)//当前位置没有元素
			{
				_table[index]._kv.first = key;
				_table[index]._kv.second = value;
				_size += 1;
				_table[index]._s = EXIST;
				return true;
			}
			//如果当前位置为DEL，也可直接插入key，只有当状态为exist时才要采用闭散列法的一次探测法去逐个探测插入
			else
			{
				/*index += 1;
				if (index == _table.size())
				{
					index = 0;
				}*/
				//这里不存在元素存满哈希表的情况，因为负载因子的存在导致元素插入到一定数量时就会重新开辟空间

				if (IsLine)
				{
					index = DetectFirst(index);
				}
				else
				{
					index = DetectSecond(index, i);
					++i;
				}
			}
		}
	}

	//这里的哈希函数采用的是“除留余数”法，用当前key值除以哈希表的大小，除此之外还有：
	//直接定址法，平方取中法，折叠法，随机数法，数学分析法
	size_t HashFunc(const K& key)
	{
		HashFun f;
		return (f(key)) % _table.size();
		//或者 return (HashFun()(key))%_table.size();//注意这里要用无名对象通过函数调用符来当作key值
	}


};


void TestHash2()
{
	//int array[] = { 11, 68, 25, 37, 14, 36, 49, 57 };
	//size_t size = sizeof(array) / sizeof(array[0]);
	//Hash<int,int,_HashFun_<int>,true> hash(11);
	//for (size_t idx = 0; idx < size; ++idx)
	//{
	//	hash.Insert(array[idx], idx);
	//}
	//hash.Insert(24, 8);
	//hash.Insert(27, 9);
	//hash.Insert(26, 10);

	////hash.Remove(14);
	//hash.Remove(14);
	//
	//cout << hash.Find(68).second << " " << (*(hash.Find(68).first))._kv.first << endl;	


	Hash<string, int, _HashFun_<string>, false> hash(11);
	hash.Insert("他", 1);
	hash.Insert("2", 2);
	hash.Insert("10", 10);
	hash.Insert("20", 20);
	hash.Insert("6", 6);
	hash.Remove("1");
	cout << hash.Find("1").second << " " << (*(hash.Find("1").first))._kv.first << endl;

	//hash.Remove(14);
	/*hash.Remove(14);

	cout << hash.Find(68).second << " " << (*(hash.Find(68).first))._kv.first << endl;*/
}

开散列法，其中还加入了迭代器的实现：

//哈希的开散列法——哈希桶的实现
//通过散列函数计算出来的具有相同地址的关键码位于同一个集合中，在同一子集中的关键码称为同义词
//每一个子集合称为一个桶，每个桶中的同义词之间用单链表连接，桶号为1的同义词子表的表头节点就是第一个元素

#include<iostream>
#include<string>
#include<vector>
using namespace std;

template<class K, class V>
struct HashBucketNode
{
	HashBucketNode<K, V>* _pnext;//哈希桶中的每个节点包含指向下一个同义词的指针和自己的键值对
	pair<K, V> _kv;

	HashBucketNode(const K& key, const V& value)
		: _pnext(NULL)
		, _kv(pair<K, V>(key, value))
	{}

};

template<class K>
class _HashFun_
{
public:
	size_t operator()(const K& key)
	{
		return key;
	}
};

template<>
class _HashFun_<string>
{
public:
	size_t BKDHash(const char* str)
	{
		return atoi(str);
	}
	size_t operator()(string& str)
	{
		return BKDHash(str.c_str());
	}
};

template<class K, class V, class HashFun>
class HashBucket;//前置声明

template<class K, class V, class Ref, class Ptr, class HashFun = _HashFun_<K>>
class HashIterator
{
	
public:
	friend class HashBucket<K, V, HashFun > ;
	typedef HashBucketNode<K, V> Node;
	typedef HashIterator<K, V, pair<K,V>&, pair<K,V>*, HashFun> Iterator;


	HashIterator()
		:_pNode(NULL)
		, _ht()
	{}

	HashIterator(Node* pNode, HashBucket<K, V, HashFun>* ht)
		:_pNode(pNode)
		, _ht(ht)
	{}

	HashIterator(const Iterator& it)
		:_pNode(it._pNode)
		, _ht(it._ht)
	{}

	Ref operator*()
	{
		return _pNode->_kv;
	}

	Ptr operator->()
	{
		return (&(operator*()));
	}

	bool operator==(const Iterator& it)
	{
		return _pNode == it._pNode;//注意这里，不是(*this == it)
	}

	bool operator!=(const Iterator& it)
	{
		return (!(*this == it));
	}

	Iterator& operator++()//前置++
	{
		Next();
		return *this;
	}

	Iterator operator++(int)//后置++
	{
		Iterator temp(*this);
		Next();
		return temp;
	}


private:
	Node* _pNode;//指向的当前节点
	HashBucket<K, V, HashFun>* _ht;

	void Next()
	{
		if (_pNode->_pnext)
		{
			_pNode = _pNode->_pnext;
		}
		else
		{
			size_t bucket = _ht->HashFunc(_pNode->_kv.first);
			for (size_t idx = bucket + 1; idx < _ht->_table.size(); ++idx)
			{
				if (_ht->_table[idx])
				{
					_pNode = _ht->_table[idx];
					return;
				}

			}
			_pNode = NULL;//该节点后面全是空桶
		}
	}
};



template<class K, class V,class HashFun=_HashFun_<K>>
class HashBucket
{
public:
	friend class HashIterator<K, V, pair<K, V>&, pair<K, V>*>;
	typedef HashBucketNode<K, V> Node;
	typedef HashBucket<K, V, HashFun> Self;
	typedef HashIterator<K, V, pair<K,V>&, pair<K,V>*> Iterator;

	HashBucket(size_t bucketNum = 10)//构造函数应该给出哈希表中的桶的个数
		:_size(0)
	{
		_table.resize(GetNextPrime(bucketNum));
	}

	pair<Iterator,bool> Insert(const K& key, const V& value)
	{
		//return _InsertUnique(key, value);//哈希表中的关键码唯一
		return _InsertEqual(key, value);//哈希表中的关键码可以重复
	}

	size_t Remove(const K& key)
	{
		return _RemoveEqual(key);//删除一个键值为key的关键码
		//return _RemoveUnique(key);//删除所有键值为key的关键码
	}

	Iterator Find(const K& key)
	{
		return _Find(key);
	}

	//这里的析构函数不能少，因为哈希桶里的节点都是通过new出来的，必须析构防止内存泄漏
	//逐个去析构每个桶里面的元素
	~HashBucket()
	{
		Destroy();
	}

	//判断哈希表是否为空
	bool Empty()const
	{
		return _size == 0;
	}

	//返回哈希表的键值个数
	size_t Size()const
	{
		return _size;
	}

	//计算key值的个数
	size_t Count(const K key)
	{
		size_t bucket = HashFunc(key);
		Node* pcur = _table[bucket];
		size_t count = 0;
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				count++;
			}
			pcur = pcur->_pnext;
		}
		return count;
	}

	//返回桶的个数
	size_t bucketCount()const
	{
		return _table.size();
	}

	//返回某个桶中的元素个数
	size_t CountInBucket(size_t bucket)const
	{
		size_t count = 0;
		Node* pcur = _table[bucket];
		while (pcur)
		{
			count++;
			pcur = pcur->_pnext;
		}
		return count;
	}

	Iterator End()
	{
		return Iterator(NULL, this);//返回最后一个有效的节点的下一个节点
	}

	Iterator Begin()//返回第一个不为空的节点
	{
		for (size_t idx = 0; idx < _table.size(); ++idx)
		{
			if (_table[idx])
			{
				return Iterator(_table[idx],this);
			}
		}
		return End();
	}


private:
	vector<Node*> _table;
	size_t _size;//哈希表中当前的有效元素

	//计算哈希地址
	size_t HashFunc(const K& key)
	{
		return (HashFun()(key))%(_table.size());
	}

	//得到下一个比当前哈希表长度大的素数
	size_t GetNextPrime(size_t num)
	{
		// 使用素数表对齐做哈希表的容量，降低哈希冲突
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList[_PrimeSize] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};
		for (size_t idx = 0; idx < _PrimeSize; ++idx)
		{
			if (_PrimeList[idx]>num)
			{
				return _PrimeList[idx];
			}
		}
		return _PrimeList[_PrimeSize - 1];
	}

	//插入的时候为了节省找插入位置的时间，这里采用头插法,对于关键码相同的元素只插入一个
	pair<Iterator, bool> _InsertEqual(const K& key, const V& value)
	{
		CheckCapacity();
		//先计算该key值属于哈希表中的哪个桶中
		size_t bucket = HashFunc(key);

		//再利用头插法把关键码插入，如果该桶为空和不为空的情况可以一并处理
		Node* pNewNode = new Node(key, value);
		pNewNode->_pnext = _table[bucket];
		_table[bucket] = pNewNode;
		/*pNewNode->_kv.first = key;
		pNewNode->_kv.second = value;*///这里不需要，Node的构造函数里已经赋值过了
		_size++;
		return make_pair(Iterator(pNewNode,this), true);
	}

	//可以插入关键码相同的元素
	pair<Iterator, bool> _InsertUnique(const K& key, const V& value)
	{
		CheckCapacity();
		//先计算该key值属于哈希表中的哪个桶中
		size_t bucket = HashFunc(key);

		Node* pcur = _table[bucket];
		//查找在该桶中是否有关键码一样的元素
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				cout << "该关键码已经存在，插入失败" << endl;
				return make_pair(Iterator(pcur,this), false);
			}
			pcur = pcur->_pnext;
		}
		Node* pNewNode = new Node(key, value);
		pNewNode->_pnext = _table[bucket];
		_table[bucket] = pNewNode;
		_size++;
		return make_pair(Iterator(pNewNode, this), true);
	}

	//删除一个键值为key的关键码
	size_t _RemoveUnique(const K& key)
	{
		//先计算该key值属于哈希表中的哪个桶中
		size_t bucket = HashFunc(key);
		size_t oldsize = _size;

		Node* prev = NULL;
		Node* pcur = _table[bucket];
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				if (prev)//第一次进来prev为空，需要在这有判空操作
				{
					prev->_pnext = pcur->_pnext;
					delete pcur;
					_size--;
					return true;
				}
				//删除的是该桶的第一个元素
				_table[bucket] = pcur->_pnext;
				delete pcur;
				_size--;
				return (oldsize - _size);
			}
			prev = pcur;
			pcur = pcur->_pnext;
		}
		cout << "没有找到该关键码，删除失败" << endl;
		return (oldsize - _size);
	}

	//删除全部的键值为key的关键码
	size_t _RemoveEqual(const K& key)
	{
		//先计算该key值属于哈希表中的哪个桶中
		size_t bucket = HashFunc(key);
		size_t oldsize = _size;

		Node* prev = NULL;
		Node* pcur = _table[bucket];
		size_t oldsize = _size;
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				if (prev)//第一次进来prev为空，需要在这有判空操作
				{
					prev->_pnext = pcur->_pnext;
					delete pcur;
					pcur = prev->_pnext;
					_size--;
				}
				else
				{
					//删除的是该桶的第一个元素
					_table[bucket] = pcur->_pnext;
					delete pcur;
					pcur = _table[bucket];
					_size--;
				}
			}
			else
			{
				prev = pcur;
				pcur = pcur->_pnext;
			}

		}
		//如果删除成功，当前哈希表中的有效元素不等于刚开始的有效元素
		if (oldsize == _size)
		{
			cout << "删除成功" << endl;
			return (oldsize - _size);
		}
		else
		{
			cout << "删除失败" << endl;
			return (oldsize - _size);
		}
	}

	//找到键值为key的元素
	Iterator _Find(const K& key)
	{
		//先计算该key值属于哈希表中的哪个桶中
		size_t bucket = HashFunc(key);

		Node* pcur = _table[bucket];
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				cout << "找到了" << key << endl;
				return Iterator(pcur,this);
			}
			pcur = pcur->_pnext;
		}
		cout << "没找到" << endl;
		return End();
	}

	//如果有效元素个数达到了哈希表的长度就增容
	void CheckCapacity()
	{
		if (_size == _table.size())
		{
			Self temp(GetNextPrime(_table.size()));
			for (size_t idx = 0; idx < _table.size(); ++idx)
			{
				Node* pcur = _table[idx];
				while (pcur)
				{
					temp.Insert(pcur->_kv.first, pcur->_kv.second);
				}
			}
			swap(_size, temp._size);
			_table.swap(temp._table);//使用成员函数的调用，只是改变了两个vector里面的指针指向，省了副本的开销
		}
	}

	//销毁哈希表
	void Destroy()//销毁哈希表
	{
		for (size_t idx = 0; idx < _table.size(); ++idx)
		{
			Node* pcur = _table[idx];
			while (pcur)
			{
				pcur = pcur->_pnext;
				delete _table[idx];
				_table[idx] = pcur;
			}
		}
		_size = 0;
	}


};


void IteratorTest()
{
	int array[] = { 11, 68, 57, 25, 14, 36, 37, 49 };
	size_t size = sizeof(array) / sizeof(array[0]);
	HashBucket<int, int> hb(size);
	for (size_t idx = 0; idx < size; ++idx)
	{
		hb.Insert(array[idx], idx + 1);
	}
	HashBucket<int, int>::Iterator it = hb.Begin();
	cout << (*it).first << " " << it->second << endl;
	while (it != hb.End())
	{
		it++;
		cout << (*it).first << " " << it->second << endl;
	}
	cout << endl;
}