C++哈希

最新推荐文章于 2025-11-30 21:05:18 发布

原创最新推荐文章于 2025-11-30 21:05:18 发布 · 469 阅读

7 ·

CC 4.0 BY-SA版权

文章标签：

#哈希算法 #c++ #散列表 #算法

在之前使用计数排序的时候，是通过对应的下标对该数组下标对应的空间进行标记，但是如果数分布不均匀就会造成浪费空间（比如1，2，3，999）。于是，哈希就来解决这问题。

1.闭散列哈希表

enum state
{
    EMPTY, DELETE, EXIST
};


template<class K, class KV,class Koft>
class HashTable
{
public:
    struct HashNode
    {
        state _state; 
        KV _kv;
    };

public:
    HashTable(size_t capacity = 4)
        : _ht(capacity), _size(0)
    {
        for (size_t i = 0; i < capacity; i++)
        {
            _ht[i]._state = EMPTY;  // 初始化每个位置为空
        }
    }

    void resize(size_t n)
    {
        // 映射关系变了
        // 1. 开辟空间
        // 2. 重新映射
        // 3. 释放空间
        Koft koft;
        int Index = 0;
            vector<HashNode> new_ht;
            new_ht.resize(_size * 2);

            for (int i = 0; i < _ht.size(); i++)
            {
                if (_ht[i]._state == EXIST)
                {
                    Index = koft(_ht[i]._kv) % new_ht.capacity();
                    while (new_ht[Index]._state == EXIST)
                    {
                        Index++;
                        if (Index == new_ht.size())
                        {
                            Index = 0;
                        }
                    }
                }
                new_ht[Index]._state = EXIST;
                new_ht[Index]._kv = _ht[i]._kv;
            }
            _ht.swap(new_ht);

    }

    bool Insert(const KV& kv)
    {     

        // 负载因子
        if (_size * 10 / _ht.capacity() > 7)
        {
            resize(_size*2);
        }


        Koft koft;
        K key = koft(kv);
        int Index = key % _ht.capacity();
        while (_ht[Index]._state==EXIST)      
        {         
            if (koft(_ht[Index]._kv) == koft(kv))
            {
                return false;
            }
            Index++;
            if (Index == _ht.size())
            {
                Index = 0;
            }
        }
        //  空或删除的节点
        _ht[Index]._kv = kv;
        _ht[Index]._state = EXIST;
        _size++;
        return true;
    }

    HashNode* Find(const K& key)
    {
        Koft koft;
        int Index = key % _ht.capacity();
        while (_ht[Index]._state != EMPTY)     // 为删除或存在的进入     如果是删除的状态还要继续往后找
        {
            if (koft(_ht[Index]._kv) == key)
            {
                if (_ht[Index]._state == EXIST)
                {
                    return &_ht[Index];
                }
                else if (_ht[Index]._state == DELETE)
                {
                    return nullptr;
                }
            }

            Index++;
            if (Index == _ht.size())
            {
                Index = 0;
            }
        }
        //  访问到空了，说明查找的元素不可能填在后面了
        return nullptr;
    }


    bool erase(const K& key)
    {
        HashNode* ret = Find(key);
        if (ret)
        {
            ret->_state = DELETE;
            _size--;
            return true;
        }
        else
        {
            return false;
        }
    }


private:
    vector<HashNode> _ht;
    size_t _size;    //   为EXIST的个数 ，        _ht.size() 表示所有是_state状态的空间的个数
};

主要写开散列哈希表

在开散列每一个vector的空间里存的是一个数据和一个next的指针，指向的是同样映射到该位置的数。

在开散列中不需要状态，因为映射到哪个点就放在哪个点。

	template<class KV>
	struct Node
	{
		KV _kv;
		Node* _next;
		Node(const KV& kv=KV())
			:_kv(kv)
			,_next(nullptr)
		{ }
	};

插入方式是进行头插，如上图

Index=key%capacity

			Node* newnode = new Node(kv);
			newnode->_next = _hash[Index];
			_hash[Index] = newnode;

		pair<iterator,bool> Insert(const KV& kv)
		{
			if (_num == _hash.size()) //  负载因子为1时扩容
			{
				resize(_num*2);
			}
			Koft koft;
			const K& key = koft(kv);
			int Index = HashFun(key) % _hash.capacity();
			// 查找是否已经存在
			Node* cur = _hash[Index];
			while (cur != nullptr)
			{
				if (koft(cur->_kv) == key)
				{
					return make_pair(iterator(cur), false);
				}
				cur = cur->_next;
			}
			//开始插入
			Node* newnode = new Node(kv);
			//头插
			newnode->_next = _hash[Index];
			_hash[Index] = newnode;
			_num++;
			return make_pair(iterator(newnode), true);
		}

并不是所有的key都是int 类型，如果是string类型，需要自己构造仿函数获取对应的int类型的值

	template<class K>
	class hash_t
	{
	public:
		const size_t& operator()(const K& key)
		{
			return key;
		}
	};


	template<>
	class hash_t<string>   // 模板特化，如果key是string类型，调用该模板
	{
	public:
		size_t operator()(const string& key)
		{
			int sum = 0;
			for (int i = 0; i < key.size(); i++)
			{
				sum += key[i];  // 用对应的ASCII码
			}
			return sum;
		}
	};

		size_t HashFun(const K& key)
		{
			Hash hash;
			return hash(key);
		}

在使用key的时候，用HashFun(key)就能获得size_t类型的值。

哈希表的框架

	template<class K, class KV, class Koft,class Hash=hash_t<K>> 
//  K是key的类型，KV是pair<K,V> ,V是value的类型， Koft是仿函数返回key，Hash是获取size_t类型数据 
	class OpenHash
	{
		typedef HashIterator<K,KV, Koft,Hash> iterator;
	public:
		friend class iterator;  //  为了让iterator访问_hash,确定下标
		typedef Node<KV> Node;

		OpenHash()
			:_hash(3)  // size()==3
			,_num(0)
		{ }


		// 在初始化和扩容都保证了size==capacity
		void resize(size_t n){}



		~OpenHash()
		{
			clear();
		}


		void clear()
		{
			for (int i = 0; i < _hash.size(); i++)
			{
				Node* next = nullptr;
				Node* cur = _hash[i];
				while (cur)
				{
					next = cur->_next;
					delete cur;
					cur = next;
				}
			}
		}

		size_t HashFun(const K& key){}

		iterator begin(){}

		iterator end(){}

		pair<iterator,bool> Insert(const KV& kv){}

		Node* Erase(const K& key){}

		Node* Find(const K& key){}


	private:
		vector<Node*> _hash;
		size_t _num;
	};
}

resize()函数

		void resize(size_t n)
		{
			vector<Node*> new_hash;
			new_hash.resize(n,nullptr);  // size()==n    //  用resize()，capacity()等于size()
			for (int i = 0; i < _hash.size(); i++)
			{
				Koft koft;
				Node* cur = _hash[i];
				while (cur != nullptr)  //要进行深拷贝，否则将原空间销毁，vector[i]的next指针就是野指针
				{
					const K& key = koft(cur->_kv);
					int Index = HashFun(key) % new_hash.capacity();
					Node* newNode = new Node(cur->_kv);
					newNode->_next = new_hash[Index];
					new_hash[Index] = newNode;			
					cur = cur->_next;
				}
			}
			_hash.swap(new_hash);
		}

迭代器

主要是operator++（），从第一个不为空的节点开始，每++就访问next指针，当next==nullptr,就要

找下一个不为nullptr的vector[index]，可以通过当前的key找到对应的下标，再对下标index++，找到vector[index]!=nullptr。所有在迭代器的实现时，需要我们将vector也传到迭代器类中，不如直接将哈希表指针this(现成的)传给迭代器类中。

迭代器，包装成一个_kv的地址（表面）。

	template<class K,class KV,class Koft,class Hash>
//  K是key的类型，KV是pair<K,V> ,V是value的类型， Koft是仿函数返回key，Hash是获取size_t类型数据
	class HashIterator
	{
		typedef HashIterator<K,KV, Koft,Hash> iterator;
		typedef OpenHash<K, KV, Koft,Hash> OH;
		typedef Node<KV> Node;
	public:
		HashIterator(Node* node = nullptr,OH* ptr=nullptr)  //  ptr哈希表指针
			:_node(node)
			,_ptr(ptr)
		{}

		KV& operator*()
		{
			return _node->_kv;
		}

		KV* operator->()
		{
			return &_node->_kv;   //  iterator it->first(表面使用)==_node->_kv.first(底层原理)
		}

		iterator operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
				return *this;
			}
			else
			{
				Koft koft;
				const K& key = koft(_node->_kv);
				int Index = _ptr->HashFun(key) % _ptr->_hash.size();
				Index++;
					for (; Index < _ptr->_hash.size(); Index++)
					{
						Node* cur = _ptr->_hash[Index];
						if (cur)
						{
							_node = cur;
							return *this;
						}
					}
					_node = nullptr;
					return *this;  
					//return iterator(nullptr, _ptr);
					//会进入死循环，因为_ptr指针变化了,永远不等于end(),为什么，因为这里返回的是一个临时拷贝，_ptr发生了浅拷贝，end()的ptr与临时的ptr不一样
					
			}
		}

		bool operator!=(const iterator& it)
		{
			return _node != it._node;
		}

// 只有访问到_hash.size()并且没找到非空数据，返回nullptr,与end()对应

		iterator begin()
		{
			for (int i = 0; i < _hash.size(); i++)
			{
				Node* cur = _hash[i];
				if (cur)
				{
					return iterator(cur,this);
				}
			}
			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

Earse()， Find()

		Node* Erase(const K& key)
		{
			int Index = HashFun(key) % _hash.capacity();
			Node* cur = _hash[Index];
			Koft koft;
			Node* prev = nullptr;
			while (cur != nullptr)
			{
				if (koft(cur->_kv) == key)
				{
					if (cur == _hash[Index])
					{
						_hash[Index] = cur->_next;
						delete cur;
						return _hash[Index];
					}
					else
					{
						prev->_next = cur->_next;
						delete cur;
						return prev->_next;
					}
				}
				prev = cur;
				cur = cur->_next;
			}
			return nullptr;
		}

		Node* Find(const K& key)
		{
			Koft koft;
			int Index = HashFun(key) % _hash.capacity();
			Node* cur = _hash[Index];
			while (cur)
			{
				if (koft(cur->_kv) == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}

map.h

template<class K,class V,class Hash=hash_t<K>>
class unordered_MAP
{
public:

	struct Koft
	{
	    const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};

	typedef typename OpenHash<K, pair<K, V>, Koft,Hash>::Node Node;
     //    因为Node也是OperHash从外面引进的，需要加typename
	typedef HashIterator<K, pair<K,V>, Koft, Hash> iterator;
	pair<iterator,bool> Insert(const pair<K, V>& kv)
	{
		return _map.Insert(kv);
	}


	Node* Erase(const K& key)
	{
		return _map.Erase(key);
	}

	Node* Find(const K& key)
	{
		return _map.Find(key);
	}

	iterator begin()
	{
		return _map.begin();
	}

	iterator end()
	{
		return _map.end();
	}

private:
	OpenHash<K, pair<K, V>, Koft,Hash> _map;
};