简单模拟实现unordered_set和unordered_map以及其底层hashtable

最新推荐文章于 2025-01-29 22:02:25 发布

嗔怪

最新推荐文章于 2025-01-29 22:02:25 发布

阅读量402

点赞数 1

CC 4.0 BY-SA版权

分类专栏： C++ 文章标签：哈希表 hash 数据结构

本文链接：https://blog.youkuaiyun.com/weixin_42458272/article/details/105415350

C++ 专栏收录该内容

7 篇文章

订阅专栏

本文深入探讨了哈希表的底层实现原理，包括unordered_map和unordered_set的数据结构和操作细节。通过模拟实现，展示了如何使用指针数组和链表来构建哈希表，并解释了哈希函数、碰撞解决策略以及迭代器的使用。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

理解底层结构

unordered_map和unordered_set底层采用的是哈希桶的数据结构，说白了就是一个指针数组。每个数组单位称为一个桶，这个桶就是一个结点单链表。通过哈希散列函数映射到数组中的具体某个位置后，将数据形成一个新的桶结点插入进单链表。

模拟实现hashtable就是实现一个维护指针数组的类。

namespace HASH_BUCKET
{
	template<class K, class V, class Hash, class GetKey>
	class HashTable;//前置声明
	//K:键值
	//V:值
	//Hash:hash函数
	//GetKey:根据V获取key

	template<class T>
	struct HashNode
	{
		HashNode(const T& val)
			:_data(val)  
		{}
		HashNode<T>* next = nullptr;
		T _data;
	};


	template<class K, class V, class Hash, class GetKey>
	class HashTableIterator
	{
		typedef typename HashTableIterator<K, V, Hash, GetKey> iterator;
		typedef typename HashNode<V> Node;
		typedef typename HashTable<K, V, Hash, GetKey> Table;
	public:
		HashTableIterator(Node* node, Table* table)
			:elem(node), _ptable(table)
		{}
		iterator operator++()
		{
			//1.判断当前桶后面是否还有元素
			Node* cur = elem->next;
			if (!cur)
			{
				int index = get_key(elem->_data);
				while (!cur&&++index < _ptable->_hashtable.size())
					cur = _ptable->_hashtable[index];
			}
			elem = cur;
			return *this;
		}
		V& operator*() const
		{
			return elem->_data;
		}
		V* operator->() const
		{
			return &(operator*());
		}
		
	private:
		Node* elem;
		Table* _ptable;
	};

	template<class K, class V, class Hash, class GetKey>

	class HashTable
	{
		typedef typename HashTableIterator<K, V, Hash, GetKey> iterator;
		typedef typename HashNode<V> Node;

		GetKey get_key;
	public:
		pair<iterator, bool> Insert(const V& _kv)
		{
			//判断是否需要新表
			if (_dataNum == _hashtable.size())
			{
				HashTable newTable;
				int newSize = _hashtable.size() == 0 ? 10 : _hashtable.size() * 2;
				newTable._hashtable.resize(newSize);
				for (int i = 0; i < _hashtable.size(); ++i)
				{
					Node* cur = _hashtable[i];
					while (cur)
					{
						newTable.Insert(cur->_data);
						newTable._dataNum++;
						cur = cur->next;
					}
				}
				_hashtable.swap(newTable._hashtable);
			}
			//为待插入元素初始化并开辟空间，源码用的是alloc
			Node* newNode = new Node(_kv);
			//判断插入bucket number
			size_t key = get_key(_kv);
			int index = HashFunc(key, _hashtable.size());
			//头插
			Node* head = _hashtable[index];
			while (head)
			{
				if (get_key(head->_data) == get_key(_kv))
				{
					iterator it(head, this);
					return make_pair(it, false);
				}
				head = head->next;
			}

			newNode->next = _hashtable[index];
			_hashtable[index] = newNode;

			++_dataNum;
			iterator it(newNode, this);
			return make_pair(it, true);
		}
		iterator Find(const K& key)
		{
			int index = HashFunc(key, _hashtable.size());
			Node* cur = _hashtable[index];
			while (cur)
			{
				if (get_key(cur->_data) == key)
					return iterator(cur,this);
				++cur;
			}
			return end();
		}
		bool Erase(K& key)
		{
			int index = HashFunc(key, _hashtable.size());
			Node* cur = _hashtable[index];
			Node* prev = cur;
			while (cur)
			{
				if (get_key(cur->_data) == key)
				{
					if (cur == _hashtable[index])
						_hashtable[index] = nullptr;
					else
						prev->next = cur->next;
					delete cur;
					return true;
				}
				prev = cur;
				cur = prev->next;
			}
			return false;
		}
		bool Erase(iterator& it)
		{
			K key = get_key(it.elem->_data);
			return Erase(key);
		}
		iterator& operator [](const K& key)
		{
			if (_hashtable[get_key(key)] != nullptr)
				return iterator(_hashtable[get_key(key)], this);
			else
				return end();
		}
		//哈希散列函数
		size_t HashFunc(const K& key, size_t size)
		{
			Hash hash;
			return hash(key) % size;
		}
		iterator begin()
		{
			for (int i = 0; i < _hashtable.size(); i++)
				if (_hashtable[i])
					return iterator(_hashtable[i], this);
			return end();
		}
		iterator end()
		{
			return iterator(0, this);
		}
		void clear()
		{
			for (int i = 0; i < _hashtable.size(); i++)
			{
				while (_hashtable[i])
				{
					cur = _hashtable[i];
					_hashtable[i] = cur->next;
					delete cur;
				}
			}
		}
	private:
		vector<Node*> _hashtable;
		//记录当前哈希桶中数据个数。
		size_t _dataNum = 0;
	};
}

模拟实现unordered_set

namespace lei
{
	template<class K, class V, class Hash = _Hash<K>, class GetKey = _Select<pair<K, V>>>
	class unordered_set;//前置声明

	template<class V>
	struct _Select
	{
		size_t operator()(const V& value)
		{
			return value;
		}
	};
	template<class K>
	struct _Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
	template<>
	struct _Hash<string>
	{
		size_t operator()(const string& str)
		{
			int sum = 0;
			for (int i = 0; i < str.size(); i++)
			{
				sum = sum + sum * 131 + str[i];
			}
			return sum;
		}
	};

	template<class K,  class Hash = _Hash<K>, class GetKey = _Select<K>>
	class unordered_set
	{
	public:
		typedef HASH_BUCKET::HashTable<K, pair<K, K>, Hash, GetKey> HashSet;
		typedef typename HASH_BUCKET::HashTableIterator < K, pair<K, K>, Hash, GetKey> Iterator;

	public:
		size_t size() const
		{
			return _set._dataNum;
		}
		size_t bucket_count() const
		{
			return _set._hashtable.size();
		}
		bool empty() const
		{
			return _set._dataNum == 0;
		}
		Iterator begin()
		{
			return _set.begin();
		}
		Iterator end()
		{
			return _set.end();
		}

		pair<Iterator, bool> insert(const pair<K, K>& _kv)
		{
			return _set.Insert(_kv);
		}
		bool erase(const K& key)
		{
			return _set.Erase(key);
		}
		bool erase(Iterator& it)
		{
			return _set.Erase(it);
		}
		void clear()
		{
			return _set.clear();
		}
	private:
		HashSet _set;
	};
}

模拟实现unordered_map

namespace lei
{
	template<class K, class V, class Hash = _Hash<K>, class GetKey = _Select<pair<K, V>>>
	class unordered_map;//前置声明

	template<class V>
	struct _Select
	{
		size_t operator()(const V& value)
		{
			return value.first;
		}
	};
	template<class K>
	struct _Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
	template<>
	struct _Hash<string>
	{
		size_t operator()(const string& str)
		{
			int sum = 0;
			for (int i = 0; i < str.size(); i++)
			{
				sum = sum + sum * 131 + str[i];
			}
			return sum;
		}
	};

	template<class K, class V, class Hash = _Hash<K>, class GetKey = _Select<pair<K,V>>>
	class unordered_map
	{
	public:
		typedef HASH_BUCKET::HashTable<K, pair<K,V>, Hash, GetKey> HashMap;
		typedef typename HASH_BUCKET::HashTableIterator < K, pair<K, V>, Hash, GetKey> Iterator;

	public:
		size_t size() const
		{
			return _map._dataNum;
		}
		size_t bucket_count() const
		{
			return _map._hashtable.size();
		}
		bool empty() const
		{
			return _map._dataNum == 0;
		}
		Iterator begin()
		{
			return _map.begin();
		}
		Iterator end()
		{
			return _map.end();
		}

		pair<Iterator, bool> insert(const pair<K, V>& _kv)
		{
			return _map.Insert(_kv);
		}
		bool erase(const K& key)
		{
			return _map.Erase(key);
		}
		bool erase(Iterator& it)
		{
			return _map.Erase(it);
		}
		void clear()
		{
			return _map.clear();
		}
	private:
		HashMap _map;
	};
}