基于std标准库的哈希函数实现的hashmap

本文档展示了如何使用C++实现一个自定义哈希表,包括两种哈希范围计算方式:PrimeHashRange和MaskHashRange。哈希表类HashMap支持键值对插入、删除和查找操作,同时具备冲突解决和哈希函数。此外,还提供了一个简单的Foo类作为键,实现了HashBase接口以供哈希表使用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

初版
/*************************************************************************
    > File Name: hash.h
    > Author: hsz
    > Brief:
    > Created Time: Tue 26 Jul 2022 02:33:19 PM CST
 ************************************************************************/

#ifndef __EULAR_HASH_H__
#define __EULAR_HASH_H__

#include <stdint.h>
#include <vector>
#include <list>
#include <memory>
#include <math.h>
#include <assert.h>

#define LOG(...) printf("[%s:%d %s()]\t", __FILE__, __LINE__, __FUNCTION__); printf(__VA_ARGS__);

#define HAS_MEMBER(XXX) \
template<typename T, typename... Args>\
struct has_member_##XXX \
{ \
private:  \
    template<typename U> \
    static auto Check(int) -> decltype(std::declval<U>().XXX(std::declval<Args>()...), std::true_type());  \
    template<typename U> \
    static std::false_type Check(...); \
public: \
    static constexpr auto value = decltype(Check<T>(0))::value; \
}

namespace eular {

/**
 * @brief 采用余数计算哈希值
 */
struct PrimeHashRange
{
    PrimeHashRange();
    ~PrimeHashRange();

    uint32_t operator()(uint32_t code);
    uint32_t getMaxBucketSize(size_t size = 0);

    uint16_t index;
};

struct MaskHashRange
{
    MaskHashRange();
    ~MaskHashRange();

    uint32_t operator()(uint32_t code);
    uint32_t getMaxBucketSize(size_t size = 0);

    uint16_t index;
};


HAS_MEMBER(hash);

template<typename Key, typename Val, typename HashRange, typename Compare>
class HashMap;

class HashBase
{
    template<typename Key, typename Val, typename HashRange, typename Compare>
    friend class HashMap;
    template<typename T, typename... Args>
    friend class has_member_hash;

public:
    HashBase() {}
    virtual ~HashBase() {}

    virtual uint32_t hash() const = 0;

protected:
    static uint32_t compute(const uint8_t *key, uint32_t size);
    static uint32_t compute2(const void *key, uint32_t size);
};

template<typename Key, typename Val, typename HashRange = PrimeHashRange, typename Compare = std::equal_to<Key>>
class HashMap
{
    static_assert(has_member_hash<Key>::value, "must inherit from HashBase!");
    static_assert(sizeof(Key) > 0 && sizeof(Val) > 0, "Key and Val must be complete before calling allocate");

public:
    HashMap()
    {
        mBucketSize = mHashRange.getMaxBucketSize();
        mBucket.resize(mBucketSize);
    }

    HashMap(uint32_t bucketSize)
    {
        mBucketSize = mHashRange.getMaxBucketSize(bucketSize);
        mBucket.resize(mBucketSize);
    }

    HashMap(const HashMap &other)
    {
        mHashRange = other.mHashRange;
        mBucketSize = other.mBucketSize;
        mBucket = other.mBucket;
    }

    ~HashMap()
    {
        mBucketSize = 0;
        mBucket.clear();
    }

    bool insert(const Key &key, const Val &val)
    {
        uint32_t hashVal = key.hash();
        uint32_t hashIndex = mHashRange(hashVal);
        auto &pair = mBucket[hashIndex];
        if (pair.hashVal != 0) {
            return false;
        }

        pair.val = val;
        pair.hashVal = hashVal;
        return true;
    }

    bool erase(const Key &key)
    {
        uint32_t idx = mHashRange(key.hash());
        mBucket[idx] = Pair();
    }

    const Val &at(const Key &key) const
    {
        return operator[](key);
    }

    Val &at(const Key &key)
    {
        return operator[](key);
    }

    Val &operator[](const Key &key)
    {
        uint32_t idx = 0;
        idx = mHashRange(key.hash());
        return mBucket[idx].val;
    }

    void clear()
    {
        mBucket.clear();
    }

    HashMap &operator=(const HashMap &other)
    {
        if (std::addressof(other) != this) {
            mHashRange = other.mHashRange;
            mBucketSize = other.mBucketSize;
            mBucket = other.mBucket;
        }

        return *this;
    }

protected:
    bool force_rehash()
    {
        uint32_t bucketSize = mHashRange.getMaxBucketSize(mBucketSize + 1);
        if (bucketSize == mBucketSize) {
            return false;
        }

        decltype(mBucket) newBucket;
        for (auto &it : mBucket) {
            uint32_t idx = mHashRange(it.hashVal);
            if (newBucket[idx].hashVal == 0) {
                newBucket[idx] = it;
            } else {
                LOG("conflict\n");
            }
        }

        mBucket.swap(newBucket);
        return true;
    }

    struct Pair
    {
        Pair() : hashVal(0), val() {}
        Pair(Key k, uint32_t hv, Val v) : key(k), hashVal(hv), val(v) {}
        ~Pair() {}
        Key key;
        uint32_t hashVal;
        Val val;
    };

private:
    uint32_t    mBucketSize;        // 桶大小
    HashRange   mHashRange;
    std::vector<Pair>  mBucket;
};

} // namespace eular

#endif // __EULAR_HASH_H__
/*************************************************************************
    > File Name: hash.cpp
    > Author: hsz
    > Brief:
    > Created Time: Tue 26 Jul 2022 02:33:23 PM CST
 ************************************************************************/

#include "hash.h"

#include <stdio.h>
#include <string.h>

// #if __WORDSIZE == 64
// const size_t offset_basis = 14695981039346656037ULL;
// const size_t prime = 1099511628211ULL;
// #elif __WORDSIZE == 32
// const size_t offset_basis = 2166136261U;
// const size_t prime = 16777619U;
// #else
// #error "undefined __WORDSIZE in hash.cpp"
// #endif

const uint32_t offset_basis = 2166136261U;
const uint32_t prime = 16777619U;

namespace eular {

uint32_t Read32(const void *p)
{
    uint32_t tmp;
    memcpy(&tmp, p, sizeof tmp);
    return tmp;
}

uint32_t HashBase::compute(const uint8_t *key, uint32_t size)
{
    uint32_t _Val = offset_basis;
    for (size_t _Idx = 0; _Idx < size; ++_Idx) {
        _Val ^= static_cast<uint32_t>(key[_Idx]);
        _Val *= prime;
    }

    return _Val;
}

uint32_t HashBase::compute2(const void *key, uint32_t keysize)
{
    uint32_t hash = 0;
    uint32_t n = keysize;
    while (n >= 4) {
        hash ^= Read32(key);
        key = (uint8_t *)key + sizeof(uint32_t);
        hash = (hash << 13) | (hash >> 19);
        n -= 4;
    }
    while (n != 0) {
        hash ^= *(uint8_t *)key;
        key = (uint8_t *)key + sizeof(uint8_t);
        hash = (hash << 8) | (hash >> 24);
        n--;
    }
    return hash;
}

// 素数集合
static const uint16_t gPrimeNumberVec[] = {
    79,     113,    163,    229,    331,    467,    659,    929,
    1301,   1823,   2557,   3581,   3967,   4517,   5209,   6599,
    7727,   8761,   9973,   11699,  13901,  16001,  18013,  20399,
    23909,  29009,  32099,  40009,  49999,  59999,
};

static const uint16_t gPrimeCount = sizeof(gPrimeNumberVec) / sizeof(uint16_t);

static const uint8_t gBitNumber = 5;

PrimeHashRange::PrimeHashRange() :
    index(0)
{

}

PrimeHashRange::~PrimeHashRange()
{

}

uint32_t PrimeHashRange::operator()(uint32_t code)
{
    assert(0 <= index && index <= gPrimeCount);
    uint32_t max = gPrimeNumberVec[index];
    return (code % max);
}

uint32_t PrimeHashRange::getMaxBucketSize(size_t size)
{
    uint32_t ret = 0;
    if (size) {
        const uint16_t *ptr = std::lower_bound(gPrimeNumberVec, gPrimeNumberVec + gPrimeCount, size);
        index = (ptr - gPrimeNumberVec) / sizeof(gPrimeNumberVec[0]);
        ret = *ptr;
    } else {
        ret = gPrimeNumberVec[index];
    }

    return ret;
}

MaskHashRange::MaskHashRange() :
    index(gBitNumber)
{

}

MaskHashRange::~MaskHashRange()
{

}

uint32_t MaskHashRange::operator()(uint32_t code)
{
    assert(gBitNumber <= index && index <= 20); // 2的20次方
    uint32_t max = 1 << index;
    uint32_t mask = max - 1;

    return (code & mask);
}

uint32_t col2(uint32_t num)
{
    uint32_t x = num;

    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >>16);

    return x + 1;
}

uint32_t MaskHashRange::getMaxBucketSize(size_t size)
{
    uint32_t ret = 1 << 20;
    if (index < 20) {
        ret = col2((uint32_t)size);
        if (ret > (1 << 20)) {
            ret = (1 << 20);
            index = 20;
        }
    }
    
    return ret;
}

} // namespace eular

/*************************************************************************
    > File Name: test_hash.cc
    > Author: hsz
    > Brief:
    > Created Time: Thu 01 Sep 2022 10:53:38 AM CST
 ************************************************************************/

#include "hash.h"
#include <iostream>
#include <string>

using namespace std;
using namespace eular;

class Foo : public HashBase {
public:
    Foo() {}
    ~Foo() {}

    void set(const std::string &str) { key = str; }

    virtual uint32_t hash() const override
    {
        return compute((const uint8_t *)key.c_str(), key.length());
    }

protected:
    std::string key;
};

int main(int argc, char **argv)
{
    HashMap<Foo, int> hashMap;
    Foo foo;
    foo.set("hello");
    hashMap[foo] = 10;

    cout << hashMap[foo] << endl;

    foo.set("world");
    hashMap[foo] = 100;

    cout << hashMap[foo] << endl;
    return 0;
}


g++ hash.cpp test_hash.cc -o test_hash -std=c++11

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值