深入源码分析Redis Dict数据结构

原创已于 2024-08-26 23:00:39 修改 · 382 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#数据结构 #redis #哈希算法

于 2022-08-02 23:58:27 首次发布

Redis 专栏收录该内容

4 篇文章

订阅专栏

本文详细剖析了Redis中的字典数据结构，包括DictEntry、DictHt、Dict等关键组件，并介绍了其渐进式rehash机制，旨在帮助读者理解Redis内部哈希表的实现原理。

Redis的哈希表的实现，主要也分了几个关键的数据类型（结构体），DictEntry 哈希节点，DictHt 哈希表，Dict 字典。

有了个大概的概念，我们看下最新的src/dict.h源码定义：

// 哈希节点结构，key value 都是指针，并且节点是一个单向链表结构，redis用链表解决冲突
typedef struct dictEntry {
    void *key;// 指向任何数据类型的指针
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;// 指向链表中下一个entry的指针
} dictEntry;

字典类型结构体，声明函数指针

//封装的是字典的操作函数指针
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

哈希表结构该部分是理解字典的关键，table是二维指针，二级指针指向DictEntry指针数组，数组每个元素是DictEntry指针，size是当前table数组的长度，used是当前数组中哈希槽的使用数量

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

字典结构，ht[] 是两个哈希表，ht[0]是正在使用的哈希表，ht[1]是未初始化的哈希表，用于未来的扩容rehash。redis的rehash是渐进式rehash，依靠rehashidx标志当前处理到哪个哈希槽。渐进式rehash好处是将一瞬间大数据量的迁移负载，分摊到了未来的每一次字典读、写、删除操作中，优势明显：低延时，不长时间阻塞客户端命令，劣势：内存占用大，多了一倍的哈希表内存大小。

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

Dict字典的扩容策略分为普通的rehash和渐进式rehash。

普通rehash。扩容，将ht[1]开一个当前第一个大于等于 used*2 的2^n的数组，缩容，将ht[1]开一个当前第一个大于等于 used*2 的2^n的数组。将ht[0]的每个哈希槽的原始rehash到ht[1]，迁移完成后释放ht[0]，ht[0]指向ht[1]，ht[1]指向新的空的哈希表。

渐进式rehash比较有意思，因为普通rehash在表数据量大时会阻塞其它线程导致暂时不能提供服务。

首先将rehashidx=0表示渐进式rehash进行中，并且rehashidx是递增的，后续每个对哈希表的crud操作会顺带将rehashidx的哈希槽链表迁移到ht[1]。迁移完毕后递增rehashidx，当ht[0]的哈希槽全部迁移完毕后将rehashidx置为-1，表示字典rehash完成。

添加数据的逻辑。这里有意思的地方在于如果在rehash的话先去迁移一个旧表的槽数据，并且把本次的新增节点放到新表中。

dictEntry *dictAddRaw(
    dict *d, // 要操作的字典
    void *key, // 任何数据类型的key
    dictEntry **existing// 哈希槽数组)
{
    long index;
    dictEntry *entry;
    dictht *ht;

    // 判断是否在rehash中，是的话先去迁移一个旧表的槽
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* 算key所在的哈希槽，如果已存在返回-1 */
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* 给新entry开内存，然后头插法放到哈希槽的头结点，因为这里假设新加入的数据未来更可能被读到(省得遍历链表了）) */
    // 这里一个细节处理，如果在rehash，就直接加到新表
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* key放到entry中 */
    dictSetKey(d, entry, key);
    return entry;
}

看看redis怎么计算key的位置，思路还是常规的哈希算法，不过多了一步查新表的逻辑

static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;
    /* 判断哈希表是否初始化和是否需要扩容，需要扩容的话会开新表并把rehashidx置0 */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    // 遍历2个表是防止rehash时把已存在的key放到新表中去
    for (table = 0; table <= 1; table++) {
        // &运算代替%，其实就是hashCode % table.length
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        // 如果旧表没找到又没rehash，就不用查新表了
        if (!dictIsRehashing(d)) break;
    }
    // 返回算出的哈希槽位置
    return idx;
}

字典查找key，源码还是比较简单的，比Java的HashMap还要简单因为这里就单链表，还不用考虑链表树化和红黑树查找和退化

dictEntry *dictFind(
    	dict *d, // 字典
     	const void *key // key指针)
{
    dictEntry *he;
    uint64_t h, idx, table;

    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */
    // 如果rehashIdx>0，就先迁移一个哈希槽到新表
    if (dictIsRehashing(d)) _dictRehashStep(d);
    // 算出hashcode
    h = dictHashKey(d, key);
    // 先找旧表
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        // 如果在rehash，就找新表
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

字典的删除源码就不看了，基本上过程和查找差不多：1.定位槽，找到entry 2.链表删除节点，释放内存 3.更新字典entry数

从源码中我们知道新增数据dictAddRaw()、查找数据dictFind()、删除数据dictGenericDelete() 都会触发rehashStep()，判断依据就是rehashidx遍历!=-1，非-1时代表旧表中待迁移的槽位。那么是什么时候、什么地方将rehashidx标志位开启呢？

在增加数据时的dictKeyIndex()算出key在表中的位置，计算前会先调用dictExpandIfNeed()看是否需要扩容。扩容的条件有点意思

static int _dictExpandIfNeeded(dict *d)
{
    /* 渐进式rehash进行中，迁移完一个旧哈希槽就返回 */
    if (dictIsRehashing(d)) return DICT_OK;
    
    /* 哈希表长度0就初始化 */
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

    /*
      used是哈希表存的entry数，size是哈希表槽位数，当entry数达到甚至超过槽数的时候就代表冲突会加重
      如果当前没有rdb或aof重写子进程，就立即开始rehash
      如果正在执行持久化那就再看第二个条件：entry数/槽数 > 5，这个冲突就会比较严重为了不影响性能就要强制开启渐进式rehash
    */
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
    {
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

需要初始化哈希表或entry数是槽数的5倍以上了，需要扩容。在这里新建一张能够容纳used*2的最靠近2^n的哈希表。初始化新表的size mask used。并把旧表的rehashidx=0，开启标志位。

int dictExpand(
    dict *d, // 字典
    unsigned long size// 当前entry数的2倍)
{
    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    dictht n; /* the new hash table */
    // 大于size的最小的一个2^n
    unsigned long realsize = _dictNextPower(size);

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* 分配新表内存并初始化表参数 */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* 旧表是空代表整个字典刚初始化 */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }
    /* 创建了新表，开启rehashidx，下一个读、增、删操作都会开始渐进式rehash */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

好了，上面说了在新增数据时判断是否需要扩容，开启标志位。现在回来看看是如何做的扩容？

渐进式rehash的工作逻辑 dictRehashStep(dict *d)。可看到源码中是委托给dictRehash()来做，redis将dict的rehash逻辑封装成通用的方法，不同的调用方只需要传递哈希表和需要迁移的哈希槽个数。渐进式rehash的工作特点就是每次只迁移一个哈希槽的节点数据。

/* This function performs just a step of rehashing, and only if there are
 * no safe iterators bound to our hash table. When we have iterators in the
 * middle of a rehashing we can't mess with the two hash tables otherwise
 * some element can be missed or duplicated.
 *
 * This function is called by common lookup or update operations in the
 * dictionary so that the hash table automatically migrates from H1 to H2
 * while it is actively used. */
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}

通用的rehash工作模板。首先本次要迁移的哈希槽是来自dict本身的rehashidx变量，从它开始迁移非空的槽位，如果遇到空槽则跳过，最多跳过10个空槽（因为这里n=1）。本次迁移完毕后根据旧表的used变量判断还有没有节点待迁移的，没有则返回0，有返回1。

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* 一次迁移最多容忍10个空槽 */
    if (!dictIsRehashing(d)) return 0;
    // n是控制迁移的槽数
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;
        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        // 从rehashidx开始找非空的槽开始迁移，跳过空槽
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            // 如果空槽太多达到阈值，本次为避免阻塞太久，就先返回了
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* 找到一个非空槽，将此槽所有节点rehash到新表槽位，用头插法，更新旧表used和新表used */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    /* 旧表迁移完毕则释放内存，并复位rehashidx=-1 */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }
    /* More to rehash... */
    return 1;
}