Redis是一种内存数据库,所以可以很方便的直接基于内存中的数据结构,对外提供众多的接口,而这些接口实际上就是对不同的数据结构进行操作的算法,首先redis本身是一种key-value的数据库,对于value常见的类型有:
字符串(string)、散列(hash)、列表(list)、集合(set)、排序集合(sorted set)、位图(bitmaps)、地理空间索引(Geospatial indexes)、流(streams)
1.全局哈希表实现
key-value是redis中最基础的结构,key-value是采用哈希表(hash table)这种基础的数据结构来实现的,其中key是字符串类型,而value则会有上面说的各种数据类型。
哈希表是由基础的哈希函数和数组来构成了,哈希函数采用的SipHash算法,数组本身无法存储多种类型的数据,所以数组元素本身是一个指针,指向具体的元素(entry),这个entry又存储了key和value的地址,具体value也是也是一个比较复杂的数据结构,整个key-value我们可以称为全局哈希表,如下图:
通常情况下哈希表查找的平均时间复杂度是O(1),所以在Redis中按照key来查找元素的复杂度也是O(1),所以Redis对于大量的key也能保持较高的性能,但是保持高性能的前提是哈希冲突的情况比较少,随着数组不断被填满,哈希冲突的概率会不断提高,所以需要和普通的哈希表一样进行扩容,这个过程叫做rehash,rehash过程需要大量的数据搬迁工作,由于Redis是采用单线程的模型,假如要搬迁的元素过多会占用很多的CPU时间,从而导致长时间阻塞其他请求的执行,所以普通哈希表存在的问题在Redis中都会遇到,有两种情况会导致Redis性能的降低:
- 哈希冲突
- 扩容搬迁
Redis解决哈希冲突采用的办法也是链表法,这时候数组元素指针指向的是链表的头指针,当链表中元素个数过多时就会执行扩容,参考:
// 来源:
// https://github.com/redis/redis/blob/5.0/src/dict.h
// https://github.com/redis/redis/blob/5.0/src/dict.c
typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry;
// 字典类型定义
typedef struct dictType {
uint64_t (*hashFunction)(const void *key);
void *(*keyDup)(dict *d, const void *key);
void *(*valDup)(dict *d, const void *obj);
int (*keyCompare)(dict *d, const void *key1, const void *key2);
void (*keyDestructor)(dict *d, void *key);
void (*valDestructor)(dict *d, void *obj);
int (*expandAllowed)(size_t moreMem, double usedRatio);
/* Allow a dictEntry to carry extra caller-defined metadata. The
* extra memory is initialized to 0 when a dictEntry is allocated. */
size_t (*dictEntryMetadataBytes)(dict *d);
} dictType;
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
// hash类型定义
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
// -1表示没有运行rehash
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
} dict;
int dictRehash(dict *d, int n) {
// 空桶间隔
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
// 搬当前嘈的整个链表
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
static void _dictRehashStep(dict *d) {
if (d->iterators == 0) dictRehash(d,1);
}
#define dictIsRehashing(d) ((d)->rehashidx != -1)
/* Add or Overwrite:
* Add an element, discarding the old value if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, *existing, auxentry;
/* Try to add the element. If the key
* does not exists dictAdd will succeed. */
entry = dictAddRaw(d,key,&existing);
if (entry) {
dictSetVal(d, entry, val);
return 1;
}
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *existing;
dictSetVal(d, existing, val);
dictFreeVal(d, &auxentry);
return 0;
}
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key,NULL);
if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
long index;
dictEntry *entry;
dictht *ht;
// 如果正在执行rehash 则执行渐进式扩容
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
// 查询下标索引
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;
/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
unsigned long idx, table;
dictEntry *he;
if (existing) *existing = NULL;
/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
// 同时查询两个哈希表
for (table = 0; table <= 1; table++) {
idx = hash & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
if (existing) *existing = he;