浅析Redis基础数据结构

最新推荐文章于 2024-06-02 14:03:03 发布

「已注销」

最新推荐文章于 2024-06-02 14:03:03 发布

阅读量490

点赞数 1

文章标签： redis 数据结构哈希算法

本文链接：https://blog.youkuaiyun.com/ma_xiao_qi/article/details/124769889

版权

本文深入剖析Redis的基础数据结构，重点讨论全局哈希表的实现以及value类型的List、Hash、Set和ZSet。Redis使用哈希表作为key-value的基础，通过SipHash算法和链表处理哈希冲突。在扩容时，Redis采用渐进式rehash策略以保持高性能。value类型的List由压缩列表（ziplist）和双向链表实现，优化内存使用和CPU缓存。Hash类型在元素数量超过一定阈值时，从ziplist转换为散列表。Set和ZSet则根据元素数量和大小选择数组、散列表或跳表实现，以平衡空间和时间效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Redis是一种内存数据库，所以可以很方便的直接基于内存中的数据结构，对外提供众多的接口，而这些接口实际上就是对不同的数据结构进行操作的算法，首先redis本身是一种key-value的数据库，对于value常见的类型有：

字符串(string)、散列(hash)、列表(list)、集合(set)、排序集合(sorted set)、位图(bitmaps)、地理空间索引(Geospatial indexes)、流(streams)

1.全局哈希表实现

key-value是redis中最基础的结构，key-value是采用哈希表（hash table）这种基础的数据结构来实现的，其中key是字符串类型，而value则会有上面说的各种数据类型。

哈希表是由基础的哈希函数和数组来构成了，哈希函数采用的SipHash算法，数组本身无法存储多种类型的数据，所以数组元素本身是一个指针，指向具体的元素（entry），这个entry又存储了key和value的地址，具体value也是也是一个比较复杂的数据结构，整个key-value我们可以称为全局哈希表，如下图：

通常情况下哈希表查找的平均时间复杂度是O(1)，所以在Redis中按照key来查找元素的复杂度也是O(1)，所以Redis对于大量的key也能保持较高的性能，但是保持高性能的前提是哈希冲突的情况比较少，随着数组不断被填满，哈希冲突的概率会不断提高，所以需要和普通的哈希表一样进行扩容，这个过程叫做rehash，rehash过程需要大量的数据搬迁工作，由于Redis是采用单线程的模型，假如要搬迁的元素过多会占用很多的CPU时间，从而导致长时间阻塞其他请求的执行，所以普通哈希表存在的问题在Redis中都会遇到，有两种情况会导致Redis性能的降低：

哈希冲突
扩容搬迁

Redis解决哈希冲突采用的办法也是链表法，这时候数组元素指针指向的是链表的头指针，当链表中元素个数过多时就会执行扩容，参考：

// 来源：
// https://github.com/redis/redis/blob/5.0/src/dict.h
// https://github.com/redis/redis/blob/5.0/src/dict.c

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

// 字典类型定义
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(dict *d, const void *key);
    void *(*valDup)(dict *d, const void *obj);
    int (*keyCompare)(dict *d, const void *key1, const void *key2);
    void (*keyDestructor)(dict *d, void *key);
    void (*valDestructor)(dict *d, void *obj);
    int (*expandAllowed)(size_t moreMem, double usedRatio);
    /* Allow a dictEntry to carry extra caller-defined metadata.  The
     * extra memory is initialized to 0 when a dictEntry is allocated. */
    size_t (*dictEntryMetadataBytes)(dict *d);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

// hash类型定义
typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    // -1表示没有运行rehash
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

int dictRehash(dict *d, int n) {
    // 空桶间隔
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        // 搬当前嘈的整个链表
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}

#define dictIsRehashing(d) ((d)->rehashidx != -1)

/* Add or Overwrite:
 * Add an element, discarding the old value if the key already exists.
 * Return 1 if the key was added from scratch, 0 if there was already an
 * element with such key and dictReplace() just performed a value update
 * operation. */
int dictReplace(dict *d, void *key, void *val)
{
    dictEntry *entry, *existing, auxentry;

    /* Try to add the element. If the key
     * does not exists dictAdd will succeed. */
    entry = dictAddRaw(d,key,&existing);
    if (entry) {
        dictSetVal(d, entry, val);
        return 1;
    }

    /* Set the new value and free the old one. Note that it is important
     * to do that in this order, as the value may just be exactly the same
     * as the previous one. In this context, think to reference counting,
     * you want to increment (set), and then decrement (free), and not the
     * reverse. */
    auxentry = *existing;
    dictSetVal(d, existing, val);
    dictFreeVal(d, &auxentry);
    return 0;
}

/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;
    // 如果正在执行rehash 则执行渐进式扩容 
    if (dictIsRehashing(d)) _dictRehashStep(d);

    /* Get the index of the new element, or -1 if
     * the element already exists. */
    // 查询下标索引
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

    /* Allocate the memory and store the new entry.
     * Insert the element in top, with the assumption that in a database
     * system it is more likely that recently added entries are accessed
     * more frequently. */
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    /* Set the hash entry fields. */
    dictSetKey(d, entry, key);
    return entry;
}

static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    /* Expand the hash table if needed */
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    // 同时查询两个哈希表
    for (table = 0; table <= 1; table++) {
        idx = hash & d->ht[table].sizemask;
        /* Search if this slot does not already contain the given key */
        he = d->ht[table].table[idx];
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;

最低0.47元/天解锁文章