一、开链法的优势
闭散列最大的局限性就是空间利用率低,例如载荷因子为0.7,那么仍有0.3的空间未被利用
使用开链法可以使载荷因子为1,每个链上都挂常数个数据,对于哈希表的开链法来说,其开的空间都是按素数个依次往后开的空间
· 那么什么时候扩容呢?当每个链上都挂上数据之后,载荷因子为1的时候,就可以开始扩容了
二、KV节点和仿函数的定义
1、定义KV节点
template<class K, class V>
struct HashTableNode
{
public:
K _key;
V _value;
HashTableNode<K, V>* _next;
public:
HashTableNode(const K& key, const V& value)
:_key(key)
, _value(value)
, _next(NULL)
{}
};
2、定义仿函数
template<class K>
struct DefaultHashFuncer
{
size_t operator()(const K& key)
{
return key;
}
};
template<>
struct DefaultHashFuncer <string>
{
static size_t BKDRHash(const char * str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
size_t operator()(const string& str)
{
return BKDRHash(str.c_str());
}
};
三、具体的实现
1、函数声明
template<class K, class V, class HashFuncer = DefaultHashFuncer<K>>
class HashTableBucket
{
typedef HashTableNode<K, V> Node;
public:
HashTableBucket();
HashTableBucket(const HashTableBucket<K, V, HashFuncer>& ht);
HashTableBucket<K, V, HashFuncer>& operator=(HashTableBucket<K, V, HashFuncer> ht);
~HashTableBucket();
bool Insert(const K& key, const V& value);
Node* Find(const K& key);
bool Remove(const K& key);
void PrintTables();
protected:
size_t _HashFunc(const K& key);
void _CheckExpand();
size_t _GetNextPrime(size_t size);
void _Clear();
protected:
vector<Node*> _tables;
size_t _size;
};
2、具体实现
(1)默认构造函数
HashTableBucket()
:_size(0)
{}
(2)拷贝构造函数
HashTableBucket(const HashTableBucket<K,V, HashFuncer>& ht)
{
_tables.resize(ht._tables.size());
for (int i = 0; i < ht._tables.size(); ++i)
{
Node* cur = ht._tables[i];
while (cur)
{
Insert(cur->_key, cur->_value);
cur = cur->_next;
}
}
}
(3)operator=
HashTableBucket<K, V, HashFuncer>& operator=(HashTableBucket<K, V, HashFuncer> ht)
{
_tables.swap(ht._tables);
std::swap(_size, ht._size);
return *this;
}
(4)析构函数
~HashTableBucket()
{
_Clear();
_size = 0;
}
(5)插入数据
bool Insert(const K& key, const V& value)
{
//检查负载因子是否需要扩张
_CheckExpand();
size_t index = _HashFunc(key);
//检查是否存在
Node* begin = _tables[index];
while (begin)
{
if (begin->_key == key)
{
return false;
}
begin = begin->_next;
}
//头插
Node* tmp = new Node(key, value);
tmp->_next = _tables[index];
_tables[index] = tmp;
++_size;
return true;
}
(6)查找数据是否在表中
Node* Find(const K& key)
{
size_t index = _HashFunc(key);
Node* cur = _tables[index];
while (cur)
{
if (cur->_key == key)
{
return cur;
}
cur = cur->_next;
}
return NULL;
}
(7)移除数据
bool Remove(const K& key)
{
size_t index = _HashFunc(key);
Node* cur = _tables[index];
Node* prev = NULL;
//1。空
while (cur)
{
if (cur->_key == key)
{
break;
}
prev = cur;
cur = cur->_next;
}
if (cur)
{
if (cur == _tables[index])
{
_tables[index] = cur->_next;
}
else
{
prev->_next = cur->_next;
}
delete cur;
return true;
}
return false;
}
(8)打印哈希表
void PrintTables()
{
for (size_t i = 0; i < _tables.size(); ++i)
{
printf("Tables[%d]->", i);
Node* cur = _tables[i];
while (cur)
{
cout << cur->_key << "->";
cur = cur->_next;
}
cout << "NULLL" << endl;
}
cout << endl;
}
定义的protected的函数
(9)哈希函数
size_t _HashFunc(const K& key)
{
return HashFuncer()(key) % _tables.size();
}
(10)自动扩容
void _CheckExpand()
{
//素数表
//负载因子到1,进行扩容
if (_size == _tables.size())
{
size_t newSize = _GetNextPrime(_size);
if (newSize == _size)
{
return;
}
vector<Node*> newTables;
newTables.resize(newSize);
for (size_t i = 0; i < _tables.size(); ++i)
{
Node* cur = _tables[i];
while (cur)
{
Node* tmp = cur;
cur = cur->_next;
size_t index = _HashFunc(tmp->_key);
tmp->_next = newTables[index];
newTables[index] = tmp;
_tables[i] = NULL;
}
}
_tables.swap(newTables);
}
}
(11)获取素数表
size_t _GetNextPrime(size_t size)
{
const int _PrimeSize = 28;
static const unsigned long _PrimeList[_PrimeSize] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
//可以设置一个静态的index
for (size_t i = 0; i < _PrimeSize; ++i)
{
if (_PrimeList[i] > _size)
{
return _PrimeList[i];
}
}
return _PrimeList[_PrimeSize - 1];
}
(12)清理节点的函数
void _Clear()
{
for (int i = 0; i < _tables.size(); ++i)
{
Node* cur = _tables[i];
while (cur)
{
Node* del = cur;
cur = cur->_next;
delete del;
}
_tables[i] = NULL;
}
}
四、测试用例
void TestDict()
{
HashTableBucket<string, vector<string>> dict;
vector<string> v;
v.push_back("delete");
v.push_back("remove");
dict.Insert("删除", v);
HashTableNode<string, vector<string>>* ret = dict.Find("删除");
ret->_value.push_back("erase");
vector<string>& words = ret->_value;
for (size_t i = 0; i < words.size(); ++i)
{
cout << words[i] << endl;
}
}