ash表应用范围很广,实现一个hash表有两个重要因素。1,hash函数的选择,很多研究人员都给出了性能卓越的函数;2解决冲突,最常见的是链表的方法,还有开放定址法等方法。redis的hash表(在dict.c dict.h中)用的hash函数是Thomas Wang's 32 bit Mix Function 和MurmurHash2,整个hash实现相当精致而且它最大的特色在于可以实现自动扩容,这样可以解决负载因子过大产生的问题。整个redis hash内存布局如下
redis hash的结构体定义如下
- typedef struct dictEntry {
- void *key;
- union {
- void *val;
- uint64_t u64;
- int64_t s64;
- } v;
- struct dictEntry *next;
- } dictEntry; //此处定义了hash表中的一个节点,key/value/下一个节点指针
- typedef struct dictType {
- unsigned int (*hashFunction)(const void *key); //将key生成一个hash值 #1
- void *(*keyDup)(void *privdata, const void *key); //存储key值 #2
- void *(*valDup)(void *privdata, const void *obj); //存储value #3
- int (*keyCompare)(void *privdata, const void *key1, const void *key2);//比较两个key #4
- void (*keyDestructor)(void *privdata, void *key); //删除key的内容 #5
- void (*valDestructor)(void *privdata, void *obj); // 删除val #6
- } dictType; //操作hash的几个基本函数
- /* This is our hash table structure. Every dictionary has two of this as we
- * implement incremental rehashing, for the old to the new table. */
- typedef struct dictht {
- dictEntry **table;
- unsigned long size; //hash表的大小(总为2的n次幂)
- unsigned long sizemask; //实际为size - 1,这样就可以直接对sizemask进行取模获得桶的位置
- unsigned long used; //hash表中已经使用的桶数
- } dictht;
- typedef struct dict {
- dictType *type;
- void *privdata;
- dictht ht[2];//有两个hash表,一开始新增加的元素都会塞到ht[0]中去,当负载因子(元素数目/桶数)达到一定的阈值(dict_force_resize_ratio = 5),就会扩容
- int rehashidx; /* rehashing not in progress if rehashidx == -1 */
- int iterators; /* number of iterators currently running ,redis限制有迭代器(iterators > 0)的时候,禁止rehash*/
- } dict;
- /* If safe is set to 1 this is a safe iterator, that means, you can call
- * dictAdd, dictFind, and other functions against the dictionary even while
- * iterating. Otherwise it is a non safe iterator, and only dictNext()
- * should be called while iterating. */
- typedef struct dictIterator {
- dict *d;
- int table, index, safe;
- dictEntry *entry, *nextEntry;
- } dictIterator;
1,hash表的创建
- dict *dictCreate(dictType *type,
- void *privDataPtr)
- {
- dict *d = zmalloc(sizeof(*d));//zmalloc是redis对malloc的封装(用的jemalloc库)
- _dictInit(d,type,privDataPtr);//privDataPtr还不知道有什么用,_dictInit主要对dict结构体内的数据进行初始化,并调用_dictReset初始化ht[0],ht[1]
- return d;
- }
创建hash表的时候,并没有申请内存空间,当增加一个key的时候,才会真正划分hash表的内存。
- int dictAdd(dict *d, void *key, void *val)
- {
- dictEntry *entry = dictAddRaw(d,key);
- if (!entry) return DICT_ERR;
- dictSetVal(d, entry, val);
- return DICT_OK;
- }
若正在处于rehash中,则在ht[1]表中插入key,否则只在ht[0]中插入key。
- static int _dictKeyIndex(dict *d, const void *key)
- {
- //...
- /* 这里会进行hash桶的内存分配*/
- if (_dictExpandIfNeeded(d) == DICT_ERR)
- return -1;
- /* 计算该key所在的桶位置 */
- h = dictHashKey(d, key);
- for (table = 0; table <= 1; table++) {
- //进行key的检查,确定没有重复的key,有的话,直接返回-1
- }
3,redis的hash表实现rehash
- /* 执行n步rehash,将ht[0] n个桶内容重新hash到ht[1]的n个桶,如果rehash完毕,则交换ht[0]和ht[1]的指针,并返回0,没rehash完毕,就返回1 */
- int dictRehash(dict *d, int n) {
- if (!dictIsRehashing(d)) return 0;
- while(n--) {
- dictEntry *de, *nextde;
- /* Check if we already rehashed the whole table... */
- if (d->ht[0].used == 0) {
- zfree(d->ht[0].table);
- d->ht[0] = d->ht[1];
- _dictReset(&d->ht[1]);
- d->rehashidx = -1;
- return 0;
- }
- /* Note that rehashidx can't overflow as we are sure there are more
- * elements because ht[0].used != 0 */
- assert(d->ht[0].size > (unsigned)d->rehashidx);
- while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;//跳过空桶
- de = d->ht[0].table[d->rehashidx]; //一个桶的第一个元素
- /* Move all the keys in this bucket from the old to the new hash HT */
- while(de) {
- unsigned int h;
- nextde = de->next;
- /* Get the index in the new hash table */
- h = dictHashKey(d, de->key) & d->ht[1].sizemask; //重新计算hash值,并计算出key在ht[1]桶的位置
- de->next = d->ht[1].table[h];
- d->ht[1].table[h] = de;
- d->ht[0].used--;
- d->ht[1].used++;
- de = nextde;
- }
- d->ht[0].table[d->rehashidx] = NULL; //清空链头
- d->rehashidx++;
- }
- return 1;
- }