初版
/*************************************************************************
> File Name: hash.h
> Author: hsz
> Brief:
> Created Time: Tue 26 Jul 2022 02:33:19 PM CST
************************************************************************/
#ifndef __EULAR_HASH_H__
#define __EULAR_HASH_H__
#include <stdint.h>
#include <vector>
#include <list>
#include <memory>
#include <math.h>
#include <assert.h>
#define LOG(...) printf("[%s:%d %s()]\t", __FILE__, __LINE__, __FUNCTION__); printf(__VA_ARGS__);
#define HAS_MEMBER(XXX) \
template<typename T, typename... Args>\
struct has_member_##XXX \
{ \
private: \
template<typename U> \
static auto Check(int) -> decltype(std::declval<U>().XXX(std::declval<Args>()...), std::true_type()); \
template<typename U> \
static std::false_type Check(...); \
public: \
static constexpr auto value = decltype(Check<T>(0))::value; \
}
namespace eular {
/**
* @brief 采用余数计算哈希值
*/
struct PrimeHashRange
{
PrimeHashRange();
~PrimeHashRange();
uint32_t operator()(uint32_t code);
uint32_t getMaxBucketSize(size_t size = 0);
uint16_t index;
};
struct MaskHashRange
{
MaskHashRange();
~MaskHashRange();
uint32_t operator()(uint32_t code);
uint32_t getMaxBucketSize(size_t size = 0);
uint16_t index;
};
HAS_MEMBER(hash);
template<typename Key, typename Val, typename HashRange, typename Compare>
class HashMap;
class HashBase
{
template<typename Key, typename Val, typename HashRange, typename Compare>
friend class HashMap;
template<typename T, typename... Args>
friend class has_member_hash;
public:
HashBase() {}
virtual ~HashBase() {}
virtual uint32_t hash() const = 0;
protected:
static uint32_t compute(const uint8_t *key, uint32_t size);
static uint32_t compute2(const void *key, uint32_t size);
};
template<typename Key, typename Val, typename HashRange = PrimeHashRange, typename Compare = std::equal_to<Key>>
class HashMap
{
static_assert(has_member_hash<Key>::value, "must inherit from HashBase!");
static_assert(sizeof(Key) > 0 && sizeof(Val) > 0, "Key and Val must be complete before calling allocate");
public:
HashMap()
{
mBucketSize = mHashRange.getMaxBucketSize();
mBucket.resize(mBucketSize);
}
HashMap(uint32_t bucketSize)
{
mBucketSize = mHashRange.getMaxBucketSize(bucketSize);
mBucket.resize(mBucketSize);
}
HashMap(const HashMap &other)
{
mHashRange = other.mHashRange;
mBucketSize = other.mBucketSize;
mBucket = other.mBucket;
}
~HashMap()
{
mBucketSize = 0;
mBucket.clear();
}
bool insert(const Key &key, const Val &val)
{
uint32_t hashVal = key.hash();
uint32_t hashIndex = mHashRange(hashVal);
auto &pair = mBucket[hashIndex];
if (pair.hashVal != 0) {
return false;
}
pair.val = val;
pair.hashVal = hashVal;
return true;
}
bool erase(const Key &key)
{
uint32_t idx = mHashRange(key.hash());
mBucket[idx] = Pair();
}
const Val &at(const Key &key) const
{
return operator[](key);
}
Val &at(const Key &key)
{
return operator[](key);
}
Val &operator[](const Key &key)
{
uint32_t idx = 0;
idx = mHashRange(key.hash());
return mBucket[idx].val;
}
void clear()
{
mBucket.clear();
}
HashMap &operator=(const HashMap &other)
{
if (std::addressof(other) != this) {
mHashRange = other.mHashRange;
mBucketSize = other.mBucketSize;
mBucket = other.mBucket;
}
return *this;
}
protected:
bool force_rehash()
{
uint32_t bucketSize = mHashRange.getMaxBucketSize(mBucketSize + 1);
if (bucketSize == mBucketSize) {
return false;
}
decltype(mBucket) newBucket;
for (auto &it : mBucket) {
uint32_t idx = mHashRange(it.hashVal);
if (newBucket[idx].hashVal == 0) {
newBucket[idx] = it;
} else {
LOG("conflict\n");
}
}
mBucket.swap(newBucket);
return true;
}
struct Pair
{
Pair() : hashVal(0), val() {}
Pair(Key k, uint32_t hv, Val v) : key(k), hashVal(hv), val(v) {}
~Pair() {}
Key key;
uint32_t hashVal;
Val val;
};
private:
uint32_t mBucketSize; // 桶大小
HashRange mHashRange;
std::vector<Pair> mBucket;
};
} // namespace eular
#endif // __EULAR_HASH_H__
/*************************************************************************
> File Name: hash.cpp
> Author: hsz
> Brief:
> Created Time: Tue 26 Jul 2022 02:33:23 PM CST
************************************************************************/
#include "hash.h"
#include <stdio.h>
#include <string.h>
// #if __WORDSIZE == 64
// const size_t offset_basis = 14695981039346656037ULL;
// const size_t prime = 1099511628211ULL;
// #elif __WORDSIZE == 32
// const size_t offset_basis = 2166136261U;
// const size_t prime = 16777619U;
// #else
// #error "undefined __WORDSIZE in hash.cpp"
// #endif
const uint32_t offset_basis = 2166136261U;
const uint32_t prime = 16777619U;
namespace eular {
uint32_t Read32(const void *p)
{
uint32_t tmp;
memcpy(&tmp, p, sizeof tmp);
return tmp;
}
uint32_t HashBase::compute(const uint8_t *key, uint32_t size)
{
uint32_t _Val = offset_basis;
for (size_t _Idx = 0; _Idx < size; ++_Idx) {
_Val ^= static_cast<uint32_t>(key[_Idx]);
_Val *= prime;
}
return _Val;
}
uint32_t HashBase::compute2(const void *key, uint32_t keysize)
{
uint32_t hash = 0;
uint32_t n = keysize;
while (n >= 4) {
hash ^= Read32(key);
key = (uint8_t *)key + sizeof(uint32_t);
hash = (hash << 13) | (hash >> 19);
n -= 4;
}
while (n != 0) {
hash ^= *(uint8_t *)key;
key = (uint8_t *)key + sizeof(uint8_t);
hash = (hash << 8) | (hash >> 24);
n--;
}
return hash;
}
// 素数集合
static const uint16_t gPrimeNumberVec[] = {
79, 113, 163, 229, 331, 467, 659, 929,
1301, 1823, 2557, 3581, 3967, 4517, 5209, 6599,
7727, 8761, 9973, 11699, 13901, 16001, 18013, 20399,
23909, 29009, 32099, 40009, 49999, 59999,
};
static const uint16_t gPrimeCount = sizeof(gPrimeNumberVec) / sizeof(uint16_t);
static const uint8_t gBitNumber = 5;
PrimeHashRange::PrimeHashRange() :
index(0)
{
}
PrimeHashRange::~PrimeHashRange()
{
}
uint32_t PrimeHashRange::operator()(uint32_t code)
{
assert(0 <= index && index <= gPrimeCount);
uint32_t max = gPrimeNumberVec[index];
return (code % max);
}
uint32_t PrimeHashRange::getMaxBucketSize(size_t size)
{
uint32_t ret = 0;
if (size) {
const uint16_t *ptr = std::lower_bound(gPrimeNumberVec, gPrimeNumberVec + gPrimeCount, size);
index = (ptr - gPrimeNumberVec) / sizeof(gPrimeNumberVec[0]);
ret = *ptr;
} else {
ret = gPrimeNumberVec[index];
}
return ret;
}
MaskHashRange::MaskHashRange() :
index(gBitNumber)
{
}
MaskHashRange::~MaskHashRange()
{
}
uint32_t MaskHashRange::operator()(uint32_t code)
{
assert(gBitNumber <= index && index <= 20); // 2的20次方
uint32_t max = 1 << index;
uint32_t mask = max - 1;
return (code & mask);
}
uint32_t col2(uint32_t num)
{
uint32_t x = num;
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >>16);
return x + 1;
}
uint32_t MaskHashRange::getMaxBucketSize(size_t size)
{
uint32_t ret = 1 << 20;
if (index < 20) {
ret = col2((uint32_t)size);
if (ret > (1 << 20)) {
ret = (1 << 20);
index = 20;
}
}
return ret;
}
} // namespace eular
/*************************************************************************
> File Name: test_hash.cc
> Author: hsz
> Brief:
> Created Time: Thu 01 Sep 2022 10:53:38 AM CST
************************************************************************/
#include "hash.h"
#include <iostream>
#include <string>
using namespace std;
using namespace eular;
class Foo : public HashBase {
public:
Foo() {}
~Foo() {}
void set(const std::string &str) { key = str; }
virtual uint32_t hash() const override
{
return compute((const uint8_t *)key.c_str(), key.length());
}
protected:
std::string key;
};
int main(int argc, char **argv)
{
HashMap<Foo, int> hashMap;
Foo foo;
foo.set("hello");
hashMap[foo] = 10;
cout << hashMap[foo] << endl;
foo.set("world");
hashMap[foo] = 100;
cout << hashMap[foo] << endl;
return 0;
}
g++ hash.cpp test_hash.cc -o test_hash -std=c++11