Hash表模拟实现——链地址法-优快云博客

本文链接：https://blog.youkuaiyun.com/Code_Journey/article/details/148852551

一、拉链法的概念

1.1、链地址法和开放定址法的比较

在前面学习开放定址法来解决哈希冲突的问题的时候已经提到了开放定址法的缺陷了，就是通过占用别人的位置来解决冲突，而链地址法使用的是哈希表的每一个槽内都存储一个链表或者红黑树，将冲突的元素存到链表或者红黑树中，两种方法各有优缺点，一般推荐使用链地址法。

开放定址法的缺点：需要频繁的扩容，并且当数据集中的时候会影响查询效率。

开放定址法的优点：空间利用率高，与链地址法相比没有链表和红黑树的额外空间开辟。缓存命中率高。

链地址法的缺点：空间的开销较大，缓存命中率较低。

链地址法的优点：不需要频繁的扩容，处理冲突高效。

二、模拟实现

2.1、insert的逻辑

首先，我们先调用find函数看看我们所插入的数据是不是已经存在，如果存在直接返回false，否则继续操作。

然后，我们需要的是一个结点的指针作为我们的表中的数据，这样方便我们实现链表和红黑树。

接着，我们实现insert的逻辑就是先通过哈希函数计算出我们的映射的存储位置，接着在该位置进行结点的链接，这样我们便可以实现每一个冲突的位置都存有一个链表。

最后，需要注意的一点是，链地址法需要我们自己实现析构函数，因为虽然编译器会自动调用vector的析构函数，但是vector的析构函数只能处理自定义类型，像我们定义的结点的指针的类型是编译器是不会处理的，如果我们不写析构函数就会造成内存泄漏。

~HashTable()
{
	for (size_t i = 0; i < _table.size(); i++)
	{
		Node* cur = _table[i];
		while (cur)
		{
			Node* next = cur->_next;
			delete cur;
			cur = next;
		}
		_table[i] = nullptr;
	}
	_n = 0;
}

bool insert(const pair<K, V>& kv)
{
  if (find(kv))
  {
	  return false;
  }
	Hash hs;
	if (_n == _table.size())
	{
		// 这里不需要在定义一个哈希表对象了,直接定义一个vector即可,因为后续
		// 要使用旧的结点,就不需要在走一遍没有意义的析构函数了
		vector<Node*> newtable(__stl_next_prime(_table.size() + 1));
		for (size_t i = 0; i < _table.size(); i++)
		{
			Node* cur = _table[i];
			while (cur)
			{
				Node* next = cur->_next;
				size_t hash0 = hs(cur->_kv.first) % newtable.size();
				cur->_next = newtable[hash0];
				newtable[hash0] = cur;
				cur = next;
			}
			_table[i] = nullptr;
		}
		_table.swap(newtable);
	}
	size_t hash0 = hs(kv.first) % _table.size();
	Node* newnode = new Node(kv);
	newnode->_next = _table[hash0];
	_table[hash0] = newnode;
	_n++;
	return true;
}

2.2、find的实现逻辑

find的实现逻辑依然是先使用哈希函数求出映射位置，然后在该位置进行查找，也就是遍历链表，如果找到了就返回该结点，没有找到就返回空。

Node* find(const K& key)
{
	Hash hs;
	size_t hash0 = hs(key) % _table.size();
	Node* cur = _table[hash0];
	while (cur)
	{
		if (cur->_kv.first == key)
		{
			return cur;
		}
		cur = cur->_next;
	}
	return nullptr;
}

2.3、erase的实现逻辑

实现erase时，我们需要一个prve指针，使用这个指针来链接删除后的链表。实现时注意两个细节，就是如果prve为空表示，该结点就是第一个结点，只是需要修改_table[hash0]和后面结点的关系，也就是将cur的next直接赋值给_table[hash0]；如果prve不为空，那就表示需要我们链接prve和cur->_next连个结点。

bool erase(const K& key)
{
	Hash hs;
	size_t hash0 = hs(key) % _table.size();
	Node* cur = _table[hash0];
	Node* prve = nullptr;
	while (cur)
	{
		if (prve == nullptr && cur->_kv.first == key)
		{
			_table[hash0] = cur->_next;
			delete cur;
			_n--;
			return true;
		}
		if (prve && cur->_kv.first == key)
		{
			prve->_next = cur->_next;
			delete cur;
			_n--;
			return true;
		}
		prve = cur;
		cur = cur->_next;
	}
	return false;
}

三、封装后的代码

inline unsigned long __stl_next_prime(unsigned long n)
{
	// Note: assumes long is at least 32 bits.
	static const int __stl_num_primes = 28;
	static const unsigned long __stl_prime_list[__stl_num_primes] =
	{
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};

	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	// >=
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)const
	{
		return (size_t)key;
	}
};
// 特化HashFunc类模板
template<>
struct HashFunc<string>
{
	size_t operator()(const string& str)const
	{
		size_t hash = 0;
		for (auto e : str)
		{
			hash += e;
			hash *= 131;
		}
		return hash;
	}
};


namespace Code_Journey
{
	template<class K, class V>
	struct HashTableNode
	{
		pair<K, V> _kv;
		HashTableNode<K, V>* _next;

		HashTableNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashTableNode<K, V> Node;
	public:
		HashTable(size_t n = __stl_next_prime(0))
			:_table(n)
			,_n(0)
		{}
		~HashTable()
		{
			for (size_t i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
			_n = 0;
		}
		bool insert(const pair<K, V>& kv)
		{
			if (find(kv))
			{
				return false;
			}
			Hash hs;
			if (_n == _table.size())
			{
				// 这里不需要在定义一个哈希表对象了,直接定义一个vector即可,因为后续
				// 要使用旧的结点,就不需要在走一遍没有意义的析构函数了
				vector<Node*> newtable(__stl_next_prime(_table.size() + 1));
				for (size_t i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hash0 = hs(cur->_kv.first) % newtable.size();
						cur->_next = newtable[hash0];
						newtable[hash0] = cur;
						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable);
			}
			size_t hash0 = hs(kv.first) % _table.size();
			Node* newnode = new Node(kv);
			newnode->_next = _table[hash0];
			_table[hash0] = newnode;
			_n++;
			return true;
		}
		Node* find(const K& key)
		{
			Hash hs;
			size_t hash0 = hs(key) % _table.size();
			Node* cur = _table[hash0];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}
		bool erase(const K& key)
		{
			Hash hs;
			size_t hash0 = hs(key) % _table.size();
			Node* cur = _table[hash0];
			Node* prve = nullptr;
			while (cur)
			{
				if (prve == nullptr && cur->_kv.first == key)
				{
					_table[hash0] = cur->_next;
					delete cur;
					_n--;
					return true;
				}
				if (prve && cur->_kv.first == key)
				{
					prve->_next = cur->_next;
					delete cur;
					_n--;
					return true;
				}
				prve = cur;
				cur = cur->_next;
			}
			return false;
		}
	private:
		vector<Node*> _table;
		size_t _n;
	};
}