redis5.0.7源码阅读——字典dict

database

redis中字典相关的文件为:dict.h与dict.c

与其说是一个字典,道不如说是一个哈希表。

一、数据结构

dictEntry

 1 typedef struct dictEntry {

2void *key;

3 union {

4void *val;

5 uint64_t u64;

6 int64_t s64;

7double d;

8 } v;

9struct dictEntry *next;

10 } dictEntry;

dictEntry是一个kv对的单向链表,其中v是一个联合体,支持数字,或者是指向一块内存的指针。

 1/*

2+---------------+

3|void *key |

4+---------------+

5|union{...} v |

6+---------------+

7|dictEntry *next|---+

8+---------------+ |

9 |

10+---------------+ <-+

11|void *key |

12+---------------+

13|union{...} v |

14+---------------+

15|dictEntry *next|

16+---------------+

17*/

为了节约篇幅,后续用以下结构表示:

1/*

2+---+ +---+

3|K|V|->|K|V|->NULL

4+---+ +---+

5*/

 

dictht

 1 typedef struct dictht {

2 dictEntry **table;

3 unsigned long size;

4/*

5 这样写可能更容易理解

6 const unsigned long size = 4;

7 dictEntry *table[size];

8*/

9

10

11 unsigned long sizemask;

12//sizemask,始终为size-1

13

14 unsigned long used;

15//当前总dictEntry数量

16 } dictht;

dictht是一个hash table,整体结构大致为:

 1/*

2+----------------------+ +---> +-----------------+ +---+

3|dictEntry **table |---+ |dictEntry *bucket|->|K|V|->NULL

4+----------------------+ +-----------------+ +---+

5|unsigned long size = 4| |dictEntry *bucket|->NULL

6+----------------------+ +-----------------+

7|unsigned long sizemask| |dictEntry *bucket|->NULL

8+----------------------+ +-----------------+

9|unsigned long used | |dictEntry *bucket|->NULL

10+----------------------+ +-----------------+

11*/

其中,table指向大小为sizeof(dictEntry*) * size的一片内存空间,每个dictEntry*可以视为一个bucket,每个bucket下挂着一个dictEntry单向链表。

size的值始终为2的位数,而sizemask的值始终为size-1,其作用是决定kv对要挂在哪个bucket上。

举个例子,size=4时,sizemask=3,其二进制为 0011,若通过hash函数计算出来key对应的hash值hash_value为5,二进制为0101,则通过位运算 sizemask & hash_value = 0011 & 0101 = 0001,十进制为1,则将会挂在idx = 1的bucket上。

 

dictType

1 typedef struct dictType {

2 uint64_t (*hashFunction)(constvoid *key);

3void *(*keyDup)(void *privdata, constvoid *key);

4void *(*valDup)(void *privdata, constvoid *obj);

5int (*keyCompare)(void *privdata, constvoid *key1, constvoid *key2);

6void (*keyDestructor)(void *privdata, void *key);

7void (*valDestructor)(void *privdata, void *obj);

8 } dictType;

dictType用于自定义一些操作的方法,如拷贝key、拷贝value、销毁key、销毁value,比较key,以及hash函数。

 

dict

1 typedef struct dict {

2 dictType *type;

3void *privdata;

4 dictht ht[2];

5long rehashidx; /* rehashing not in progress if rehashidx == -1 */

6 unsigned long iterators; /* number of iterators currently running */

7 } dict;

之前提到的dictType与dictht都是dict的成员变量。除此之外,还有privdata,是在创建dict的时候调用者传入,用于特定操作时回传给函数的。如:

 1#define dictFreeVal(d, entry) 

2if ((d)->type->valDestructor)

3 (d)->type->valDestructor((d)->privdata, (entry)->v.val)

4

5#define dictSetVal(d, entry, _val_) do {

6if ((d)->type->valDup)

7 (entry)->v.val = (d)->type->valDup((d)->privdata, _val_);

8else

9 (entry)->v.val = (_val_);

10 } while(0)

11

12#define dictSetSignedIntegerVal(entry, _val_)

13do { (entry)->v.s64 = _val_; } while(0)

14

15#define dictSetUnsignedIntegerVal(entry, _val_)

16do { (entry)->v.u64 = _val_; } while(0)

17

18#define dictSetDoubleVal(entry, _val_)

19do { (entry)->v.d = _val_; } while(0)

20

21#define dictFreeKey(d, entry)

22if ((d)->type->keyDestructor)

23 (d)->type->keyDestructor((d)->privdata, (entry)->key)

24

25#define dictSetKey(d, entry, _key_) do {

26if ((d)->type->keyDup)

27 (entry)->key = (d)->type->keyDup((d)->privdata, _key_);

28else

29 (entry)->key = (_key_);

30 } while(0)

31

32#define dictCompareKeys(d, key1, key2)

33 (((d)->type->keyCompare) ?

34 (d)->type->keyCompare((d)->privdata, key1, key2) :

35 (key1) == (key2))

rehashidx,是与ht[2]配合实现渐进式rehash操作的。若使用一步到位的方式,当key的数量非常大的时候,rehashing期间,是会卡死所有操作的。

iterators,是用于记录当前使用的安全迭代器数量,与rehashing操作有关。

整体结构如下:

 1/*

2+---------+ /+-----------+ +-->+----------+ +---+

3|dictType*| / |dictEntry**|---+ |dictEntry*|->|K|V|->NULL

4+---------+ / +-----------+ +----------+ +---+

5|privdata | / |size | |dictEntry*|->NULL

6+---------+/ +-----------+ +----------+

7|ht[2] | |sizemask | |dictEntry*|->NULL

8+---------+ +-----------+ +----------+

9|rehashidx| |used | |dictEntry*|->NULL

10+---------+ +-----------+ +----------+

11|iterators|

12+---------+ +-----------+

13 |dictEntry**|-->NULL

14 +-----------+

15 |size |

16 +-----------+

17 |sizemask |

18 +-----------+

19 |used |

20 +-----------+

21*/

 

二、创建

 1staticvoid _dictReset(dictht *ht)

2{

3 ht->table = NULL;

4 ht->size = 0;

5 ht->sizemask = 0;

6 ht->used = 0;

7}

8

9int _dictInit(dict *d, dictType *type,

10void *privDataPtr)

11{

12 _dictReset(&d->ht[0]);

13 _dictReset(&d->ht[1]);

14 d->type = type;

15 d->privdata = privDataPtr;

16 d->rehashidx = -1;

17 d->iterators = 0;

18return DICT_OK;

19}

20

21 dict *dictCreate(dictType *type,

22void *privDataPtr)

23{

24 dict *d = zmalloc(sizeof(*d));

25

26 _dictInit(d,type,privDataPtr);

27return d;

28 }

可以调用dictCreate创建一个空的dict,它会分配好dict的空间,并初始化所有成员变量。在这里把privdata传入并保存。搜了一下整个redis源码的dictCreate调用,看到传入的值全为NULL。目前的理解暂时不清楚这个变量是什么时候赋值的。初始化后的dict结构如下:

 1/*

2+------------+ /+-----------+

3|dictType* | / |dictEntry**|-->NULL

4+------------+ / +-----------+

5|privdata | / |size=0 |

6+------------+/ +-----------+

7|ht[2] | |sizemask=0 |

8+------------+ +-----------+

9|rehashidx=-1| |used=0 |

10+------------+ +-----------+

11|iterators=0 |

12+------------+ +-----------+

13 |dictEntry**|-->NULL

14 +-----------+

15 |size=0 |

16 +-----------+

17 |sizemask=0 |

18 +-----------+

19 |used=0 |

20 +-----------+

21*/

刚创建好的dict是存不了任何数据的,其两个hash table的size都为0。这里先说明一下resize操作:

 1#define DICT_HT_INITIAL_SIZE     4

2

3static unsigned long _dictNextPower(unsigned long size)

4{

5 unsigned long i = DICT_HT_INITIAL_SIZE;

6

7if (size >= LONG_MAX) return LONG_MAX + 1LU;

8while(1) {

9if (i >= size)

10return i;

11 i *= 2;

12 }

13}

14

15/* Expand or create the hash table */

16int dictExpand(dict *d, unsigned long size)

17{

18/* the size is invalid if it is smaller than the number of

19 * elements already inside the hash table */

20if (dictIsRehashing(d) || d->ht[0].used > size)

21return DICT_ERR;

22

23 dictht n; /* the new hash table */

24 unsigned long realsize = _dictNextPower(size);

25

26/* Rehashing to the same table size is not useful. */

27if (realsize == d->ht[0].size) return DICT_ERR;

28

29/* Allocate the new hash table and initialize all pointers to NULL */

30 n.size = realsize;

31 n.sizemask = realsize-1;

32 n.table = zcalloc(realsize*sizeof(dictEntry*));

33 n.used = 0;

34

35/* Is this the first initialization? If so it"s not really a rehashing

36 * we just set the first hash table so that it can accept keys. */

37if (d->ht[0].table == NULL) {

38 d->ht[0] = n;

39return DICT_OK;

40 }

41

42/* Prepare a second hash table for incremental rehashing */

43 d->ht[1] = n;

44 d->rehashidx = 0;

45return DICT_OK;

46}

47

48int dictResize(dict *d)

49{

50int minimal;

51

52if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

53 minimal = d->ht[0].used;

54if (minimal < DICT_HT_INITIAL_SIZE)

55 minimal = DICT_HT_INITIAL_SIZE;

56return dictExpand(d, minimal);

57 }

_dictNextPower用于获取当前要分配给hash table的size,得到的值一定是2的倍数,初始值为4。

dictExpand,从源码注释上看,它是为了扩容hash table,或者创建一个。它不允许与rehashing操作同时进行,也不能强制缩容。在使用_dictNextPower得到需要的size之后,它先是使用一个临时变量n去分配空间,然后进行判断,若ht[0].table的值为NULL,则认为是刚create出来的dict,直接把n赋值给ht[0],否则给ht[1],并开始rehashing操作。

dictResize操作就不用多说了。

 

三、rehashing操作

若有这样一个dict,假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

 1/*

2 +----+

3 +->|K1|V|->NULL

4+------------+ /+-----------+ +->+----------+ / +----+

5|dictType* | / |dictEntry**|--+ |dictEntry*|/ +----+

6+------------+ / +-----------+ +----------+ +-->|K2|V|->NULL

7|privdata | / |size=4 | |dictEntry*|/ +----+

8+------------+/ +-----------+ +----------+

9|ht[2] | |sizemask=3 | |dictEntry*| +----+

10+------------+ +-----------+ +----------+ +-->|K3|V|->NULL

11|rehashidx=-1| |used=4 | |dictEntry*| +----+

12+------------+ +-----------+ +----------+ +----+

13|iterators=0 | +->|K4|V|->NULL

14+------------+ +-----------+ +----+

15 |dictEntry**|-->NULL

16 +-----------+

17 |size=0 |

18 +-----------+

19 |sizemask=0 |

20 +-----------+

21 |used=0 |

22 +-----------+

23*/

 1staticint _dictExpandIfNeeded(dict *d)

2{

3/* Incremental rehashing already in progress. Return. */

4if (dictIsRehashing(d)) return DICT_OK;

5

6/* If the hash table is empty expand it to the initial size. */

7if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

8

9/* If we reached the 1:1 ratio, and we are allowed to resize the hash

10 * table (global setting) or we should avoid it but the ratio between

11 * elements/buckets is over the "safe" threshold, we resize doubling

12 * the number of buckets. */

13if (d->ht[0].used >= d->ht[0].size &&

14 (dict_can_resize ||

15 d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))

16 {

17return dictExpand(d, d->ht[0].used*2);

18 }

19return DICT_OK;

20 }

通过函数_dictExpandIfNeeded,可知当used >= size且dict_can_resize == TRUE的时候,需要调用dictExpand进入rehashing状态。dict_can_resize默认为1

1staticint dict_can_resize = 1;

2static unsigned int dict_force_resize_ratio = 5;

需要的size为当前used * 2,即为8。调用dictExpand之后的结构:

 1/*

2 +----+

3 +->|K1|V|->NULL

4 +->+----------+ / +----+

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=0 | |used=4 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+

19 |dictEntry**|--+ |dictEntry*|->NULL

20 +-----------+ +----------+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=0 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

根据rehashing操作

 1int dictRehash(dict *d, int n) {

2int empty_visits = n*10; /* Max number of empty buckets to visit. */

3if (!dictIsRehashing(d)) return0;

4

5while(n-- && d->ht[0].used != 0) {

6 dictEntry *de, *nextde;

7

8/* Note that rehashidx can"t overflow as we are sure there are more

9 * elements because ht[0].used != 0 */

10 assert(d->ht[0].size > (unsigned long)d->rehashidx);

11while(d->ht[0].table[d->rehashidx] == NULL) {

12 d->rehashidx++;

13if (--empty_visits == 0) return1;

14 }

15 de = d->ht[0].table[d->rehashidx];

16/* Move all the keys in this bucket from the old to the new hash HT */

17while(de) {

18 uint64_t h;

19

20 nextde = de->next;

21/* Get the index in the new hash table */

22 h = dictHashKey(d, de->key) & d->ht[1].sizemask;

23 de->next = d->ht[1].table[h];

24 d->ht[1].table[h] = de;

25 d->ht[0].used--;

26 d->ht[1].used++;

27 de = nextde;

28 }

29 d->ht[0].table[d->rehashidx] = NULL;

30 d->rehashidx++;

31 }

32

33/* Check if we already rehashed the whole table... */

34if (d->ht[0].used == 0) {

35 zfree(d->ht[0].table);

36 d->ht[0] = d->ht[1];

37 _dictReset(&d->ht[1]);

38 d->rehashidx = -1;

39return0;

40 }

41

42/* More to rehash... */

43return1;

44 }

rehashing操作将会把ht[0]里,rehashidx的值对应的bucket下的所有dictEntry,移至ht[1],之后对rehashidx进行自增处理。当ht[0]->used为0时,认为ht[0]的所有dictEntry已经移至ht[1],此时return 0,否则 return 1,告诉调用者,还需要继续进行rehashing操作。同时,rehashing时允许最多跳过10n的空bucket,就要退出流程。假设传入的n=1,即只进行一次rehashing操作,转换至完成之后的结构:

 1/*

2

3 +->NULL

4 +->+----------+ /

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=1 | |used=3 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+ +----+

19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->NULL

20 +-----------+ +----------+ +----+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=1 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

所有节点移完时:

 1/*

2

3

4 +->+----------+

5 | |dictEntry*|->NULL

6 | +----------+

7 | |dictEntry*|->NULL

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*|->NULL

10 +------------+ / +-----------+ +----------+

11 |privdata | / |size=4 | |dictEntry*|->NULL

12 +------------+/ +-----------+ +----------+

13 |ht[2] | |sizemask=3 |

14 +------------+ +-----------+

15 |rehashidx=4 | |used=0 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+ +----+

19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->NULL

20 +-----------+ +----------+ +----+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+ +----+

23 |sizemask=7 | |dictEntry*|-->|K3|V|->NULL

24 +-----------+ +----------+ +----+

25 |used=4 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+ +----+

29 |dictEntry*|-->|K2|V|->NULL

30 +----------+ +----+

31 |dictEntry*|->NULL

32 +----------+ +----+

33 |dictEntry*|-->|K4|V|->NULL

34 +----------+ +----+

35*/

此时ht[0]->used为0,释放原ht[0]的hash table,把ht[1]赋值给ht[0],并设置ht[1] = NULL,最后重置rehashidx=-1,rehashing操作结束

 1/*

2 +------------+ /+-----------+ +-->+----------+ +----+

3 |dictType* | / |dictEntry**|---+ |dictEntry*|-->|K1|V|->NULL

4 +------------+ / +-----------+ +----------+ +----+

5 |privdata | / |size=8 | |dictEntry*|->NULL

6 +------------+/ +-----------+ +----------+ +----+

7 |ht[2] | |sizemask=7 | |dictEntry*|-->|K3|V|->NULL

8 +------------+ +-----------+ +----------+ +----+

9 |rehashidx=-1| |used=4 | |dictEntry*|->NULL

10 +------------+ +-----------+ +----------+

11 |iterators=0 | |dictEntry*|->NULL

12 +------------+ +-----------+ +----------+ +----+

13 |dictEntry**|->NULL |dictEntry*|-->|K2|V|->NULL

14 +-----------+ +----------+ +----+

15 |size=0 | |dictEntry*|->NULL

16 +-----------+ +----------+ +----+

17 |sizemask=0 | |dictEntry*|-->|K4|V|->NULL

18 +-----------+ +----------+ +----+

19 |used=0 |

20 +-----------+

21*/

rehashing操作的触发共有两种方式

1、定时操作

 1longlong timeInMilliseconds(void) {

2struct timeval tv;

3

4 gettimeofday(&tv,NULL);

5return (((longlong)tv.tv_sec)*1000)+(tv.tv_usec/1000);

6}

7

8/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */

9int dictRehashMilliseconds(dict *d, int ms) {

10longlong start = timeInMilliseconds();

11int rehashes = 0;

12

13while(dictRehash(d,100)) {

14 rehashes += 100;

15if (timeInMilliseconds()-start > ms) break;

16 }

17return rehashes;

18 }

 

外部传入一个毫秒时间,在这时间内循环执行rehashing,每次执行100次。

2、操作时触发

1staticvoid _dictRehashStep(dict *d) {

2if (d->iterators == 0) dictRehash(d,1);

3 }

在插入、删除、查找等操作时,顺带执行一次rehashing操作。值得注意的是,如果存在安全的迭代器,即d->iterators != 0,则不会进行rehashing操作

 

三、插入

获取可插入新节点的bucket idx的方法:

 1staticlong _dictKeyIndex(dict *d, constvoid *key, uint64_t hash, dictEntry **existing)

2{

3 unsigned long idx, table;

4 dictEntry *he;

5if (existing) *existing = NULL;

6

7/* Expand the hash table if needed */

8if (_dictExpandIfNeeded(d) == DICT_ERR)

9return -1;

10for (table = 0; table <= 1; table++) {

11 idx = hash & d->ht[table].sizemask;

12/* Search if this slot does not already contain the given key */

13 he = d->ht[table].table[idx];

14while(he) {

15if (key==he->key || dictCompareKeys(d, key, he->key)) {

16if (existing) *existing = he;

17return -1;

18 }

19 he = he->next;

20 }

21if (!dictIsRehashing(d)) break;

22 }

23return idx;

24 }

此方法在进行查找idx之前,先进行一次判断,是否需要rehashing操作。而后进行查找。idx的值就是通过hash函数计算出来的hash_value与sizemask做位运算的结果,然后遍历此idx对应的bucket,若已存在相同的key,则认为不可插入,并把对应的dictEntry用传入的二级指针的方式传出,供调用者使用。若不存在,则需要判断是否正在进行rehashing操作。若在,则会对ht[1]做一次相同的操作。最终可以得到一个idx值,或传出一个dictEntry。

由于rehashing期间,将会把ht[0]的所有dictEntry依次转移至ht[1],为了防止新插入的dictEntry落到ht[0]已完成rehashing操作的bucket上,在rehashing期间,返回的可插入的idx一定是属于ht[1]的。

插入方法:

 1 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)

2{

3long index;

4 dictEntry *entry;

5 dictht *ht;

6

7if (dictIsRehashing(d)) _dictRehashStep(d);

8

9/* Get the index of the new element, or -1 if

10 * the element already exists. */

11if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)

12return NULL;

13

14/* Allocate the memory and store the new entry.

15 * Insert the element in top, with the assumption that in a database

16 * system it is more likely that recently added entries are accessed

17 * more frequently. */

18 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];

19 entry = zmalloc(sizeof(*entry));

20 entry->next = ht->table[index];

21 ht->table[index] = entry;

22 ht->used++;

23

24/* Set the hash entry fields. */

25 dictSetKey(d, entry, key);

26return entry;

27 }

若不存在相同key,则插入,否则,传出dictEntry的指针。插入时,由于没有记录每个dictEntry链表的尾指针,所以使用头插法,可以节约插入时的时间消耗。

dictAddRaw做为最终插入的方法,被多个方法所调用:

 1//若不存在,则插入,否则报错

2int dictAdd(dict *d, void *key, void *val)

3{

4 dictEntry *entry = dictAddRaw(d,key,NULL);

5

6if (!entry) return DICT_ERR;

7 dictSetVal(d, entry, val);

8return DICT_OK;

9}

10

11//若存在,则替换value,否则插入

12int dictReplace(dict *d, void *key, void *val)

13{

14 dictEntry *entry, *existing, auxentry;

15 entry = dictAddRaw(d,key,&existing);

16if (entry) {

17 dictSetVal(d, entry, val);

18return1;

19 }

20 auxentry = *existing;

21 dictSetVal(d, existing, val);

22 dictFreeVal(d, &auxentry);

23return0;

24}

25

26//若存在,则返回对应dictEntry,否则插入后返回新的dictEntry

27 dictEntry *dictAddOrFind(dict *d, void *key) {

28 dictEntry *entry, *existing;

29 entry = dictAddRaw(d,key,&existing);

30return entry ? entry : existing;

31 }

对于一个刚刚create的dict:

 1/*

2

3+------------+ /+-----------+

4|dictType* | / |dictEntry**|-->NULL

5+------------+ / +-----------+

6|privdata | / |size=0 |

7+------------+/ +-----------+

8|ht[2] | |sizemask=0 |

9+------------+ +-----------+

10|rehashidx=-1| |used=0 |

11+------------+ +-----------+

12|iterators=0 |

13+------------+ +-----------+

14 |dictEntry**|-->NULL

15 +-----------+

16 |size=0 |

17 +-----------+

18 |sizemask=0 |

19 +-----------+

20 |used=0 |

21 +-----------+

22*/

假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7,使用sizemask计算出来的idx分别为0、1、2、3

现调用dictAdd方法进行插入

1、插入K1

执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

 1/*

2

3 +-->NULL

4+------------+ /+-----------+ +->+----------+ /

5|dictType* | / |dictEntry**|--+ |dictEntry*|/

6+------------+ / +-----------+ +----------+ +--->NULL

7|privdata | / |size=4 | |dictEntry*|/

8+------------+/ +-----------+ +----------+

9|ht[2] | |sizemask=3 | |dictEntry*|

10+------------+ +-----------+ +----------+ +--->NULL

11|rehashidx=-1| |used=0 | |dictEntry*|

12+------------+ +-----------+ +----------+

13|iterators=0 | +-->NULL

14+------------+ +-----------+

15 |dictEntry**|-->NULL

16 +-----------+

17 |size=0 |

18 +-----------+

19 |sizemask=0 |

20 +-----------+

21 |used=0 |

22 +-----------+

23*/

同时得到其在ht[0]的idx = 0,且不在rehashing操作中,于是直接插入

 1/*

2 +----+

3 +->|K1|V|->NULL

4+------------+ /+-----------+ +->+----------+ / +----+

5|dictType* | / |dictEntry**|--+ |dictEntry*|/

6+------------+ / +-----------+ +----------+ +--->NULL

7|privdata | / |size=4 | |dictEntry*|/

8+------------+/ +-----------+ +----------+

9|ht[2] | |sizemask=3 | |dictEntry*|

10+------------+ +-----------+ +----------+ +--->NULL

11|rehashidx=-1| |used=1 | |dictEntry*|

12+------------+ +-----------+ +----------+

13|iterators=0 | +-->NULL

14+------------+ +-----------+

15 |dictEntry**|-->NULL

16 +-----------+

17 |size=0 |

18 +-----------+

19 |sizemask=0 |

20 +-----------+

21 |used=0 |

22 +-----------+

23*/

 2、插入K2、K3、K4后:

 1/*

2 +----+

3 +->|K1|V|->NULL

4+------------+ /+-----------+ +->+----------+ / +----+

5|dictType* | / |dictEntry**|--+ |dictEntry*|/ +-----

6+------------+ / +-----------+ +----------+ +-->|K2|V|->NULL

7|privdata | / |size=4 | |dictEntry*|/ +----+

8+------------+/ +-----------+ +----------+

9|ht[2] | |sizemask=3 | |dictEntry*| +----+

10+------------+ +-----------+ +----------+ +-->|K3|V|->NULL

11|rehashidx=-1| |used=4 | |dictEntry*| +----+

12+------------+ +-----------+ +----------+ +----+

13|iterators=0 | +->|K4|V|->NULL

14+------------+ +-----------+ +----+

15 |dictEntry**|-->NULL

16 +-----------+

17 |size=0 |

18 +-----------+

19 |sizemask=0 |

20 +-----------+

21 |used=0 |

22 +-----------+

23*/

3、此时若有一个K5,计算出来的hash值为8,则:

i.因此刻不在rehashing操作,所以不用做处理

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded:

 1/*

2 +----+

3 +->|K1|V|->NULL

4 +->+----------+ / +----+

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=0 | |used=4 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+

19 |dictEntry**|--+ |dictEntry*|->NULL

20 +-----------+ +----------+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=0 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

同时得到其在ht[1]的idx=0

iii.插入

 1/*

2 +----+

3 +->|K1|V|->NULL

4 +->+----------+ / +----+

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=0 | |used=4 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+ +----+

19 |dictEntry**|--+ |dictEntry*|-->|K5|V|->NULL

20 +-----------+ +----------+ +----+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=1 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

4、此时若有一个K6,计算出来的hash值为16,则:

i.此时已处理rehashing操作,执行一步:

 1/*

2

3 +-->NULL

4 +->+----------+ /

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=1 | |used=3 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+ +----+ +----+

19 |dictEntry**|--+ |dictEntry*|-->|K1|V|->|K5|V|->NULL

20 +-----------+ +----------+ +----+ +----+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=2 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded,因已在进行rehashing,所以不做任何处理,只返回其在ht[1]的idx 0

iii.头插法将K6插入

 1/*

2

3 +-->NULL

4 +->+----------+ /

5 | |dictEntry*|/ +----+

6 | +----------+ +-->|K2|V|->NULL

7 | |dictEntry*|/ +----+

8 +------------+ /+-----------+ | +----------+

9 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

10 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

11 |privdata | / |size=4 | |dictEntry*| +----+

12 +------------+/ +-----------+ +----------+ +----+

13 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

14 +------------+ +-----------+ +----+

15 |rehashidx=1 | |used=3 |

16 +------------+ +-----------+

17 |iterators=0 |

18 +------------+ +-----------+ +->+----------+ +----+ +----+ +----+

19 |dictEntry**|--+ |dictEntry*|-->|K6|V|->|K1|V|->|K5|V|->NULL

20 +-----------+ +----------+ +----+ +----+ +----+

21 |size=8 | |dictEntry*|->NULL

22 +-----------+ +----------+

23 |sizemask=7 | |dictEntry*|->NULL

24 +-----------+ +----------+

25 |used=3 | |dictEntry*|->NULL

26 +-----------+ +----------+

27 |dictEntry*|->NULL

28 +----------+

29 |dictEntry*|->NULL

30 +----------+

31 |dictEntry*|->NULL

32 +----------+

33 |dictEntry*|->NULL

34 +----------+

35*/

 以上为正常插入时的情况,key已存在,或是调用另外两个方法的情况与之大同小异,不再做过多叙述。

 

四、查找 

 1 dictEntry *dictFind(dict *d, constvoid *key)

2{

3 dictEntry *he;

4 uint64_t h, idx, table;

5

6if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */

7if (dictIsRehashing(d)) _dictRehashStep(d);

8 h = dictHashKey(d, key);

9for (table = 0; table <= 1; table++) {

10 idx = h & d->ht[table].sizemask;

11 he = d->ht[table].table[idx];

12while(he) {

13if (key==he->key || dictCompareKeys(d, key, he->key))

14return he;

15 he = he->next;

16 }

17if (!dictIsRehashing(d)) return NULL;

18 }

19return NULL;

20 }

同样,若在rehashing期间,则执行一次。首先在ht[0]里查找,计算出hash值对应ht[0]的idx,取得其bucket,然后遍历之,找到与指定key相同的dictEntry。若ht[0]中找不到指定的key,且正在进行rehashing操作,则去ht[1]以相同方式也查找一次。

redis额外提供一个,根据key只获取其value的方法:

1void *dictFetchValue(dict *d, constvoid *key) {

2 dictEntry *he;

3

4 he = dictFind(d,key);

5return he ? dictGetVal(he) : NULL;

6 }

key不存在时返回NULL

 

五、删除

删除方法:

 1static dictEntry *dictGenericDelete(dict *d, constvoid *key, int nofree) {

2 uint64_t h, idx;

3 dictEntry *he, *prevHe;

4int table;

5

6if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;

7

8if (dictIsRehashing(d)) _dictRehashStep(d);

9 h = dictHashKey(d, key);

10

11for (table = 0; table <= 1; table++) {

12 idx = h & d->ht[table].sizemask;

13 he = d->ht[table].table[idx];

14 prevHe = NULL;

15while(he) {

16if (key==he->key || dictCompareKeys(d, key, he->key)) {

17/* Unlink the element from the list */

18if (prevHe)

19 prevHe->next = he->next;

20else

21 d->ht[table].table[idx] = he->next;

22if (!nofree) {

23 dictFreeKey(d, he);

24 dictFreeVal(d, he);

25 zfree(he);

26 }

27 d->ht[table].used--;

28return he;

29 }

30 prevHe = he;

31 he = he->next;

32 }

33if (!dictIsRehashing(d)) break;

34 }

35return NULL; /* not found */

36 }

查找方式与dictFind相同。找到之后,由调用者指定是否要销毁此dictEntry,若不销毁,则要把对应指针传出来,给外部使用。

此方法被两个接口所调用:

1int dictDelete(dict *ht, constvoid *key) {

2return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;

3}

4

5 dictEntry *dictUnlink(dict *ht, constvoid *key) {

6return dictGenericDelete(ht,key,1);

7 }

dictDelete就不用多说了,直接删除对应dictEntry。关于为什么需要dictUnlink,源码的注释上写道,如果有某种操作,需要先查找指定key对应的dictEntry,然后对其做点操作,接着就直接删除,在没有dictUnlink的时候,需要这样:

1 entry = dictFind(...);

2// Do something with entry

3 dictDelete(dictionary,entry);

实际需要查找两次。而在有dictUnlink的情况下:

1 entry = dictUnlink(dictionary,entry);

2// Do something with entry

3 dictFreeUnlinkedEntry(entry);

只需要一次查找,配合专门的删除操作,即可。

1void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {

2if (he == NULL) return;

3 dictFreeKey(d, he);

4 dictFreeVal(d, he);

5 zfree(he);

6 }

 

六、销毁

清空一个hash table的方法

 1int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {

2 unsigned long i;

3

4/* Free all the elements */

5for (i = 0; i < ht->size && ht->used > 0; i++) {

6 dictEntry *he, *nextHe;

7

8if (callback && (i & 65535) == 0) callback(d->privdata);

9

10if ((he = ht->table[i]) == NULL) continue;

11while(he) {

12 nextHe = he->next;

13 dictFreeKey(d, he);

14 dictFreeVal(d, he);

15 zfree(he);

16 ht->used--;

17 he = nextHe;

18 }

19 }

20/* Free the table and the allocated cache structure */

21 zfree(ht->table);

22/* Re-initialize the table */

23 _dictReset(ht);

24return DICT_OK; /* never fails */

25 }

两层循环,分别遍历所有bucket与单bucket里所有dictEntry进行释放。关于这里的 (i&65535) == 0的判断,_dictClear方法仅在相同文件的方法dictEmpty与dictRelease调用

 1void dictRelease(dict *d)

2{

3 _dictClear(d,&d->ht[0],NULL);

4 _dictClear(d,&d->ht[1],NULL);

5 zfree(d);

6}

7

8void dictEmpty(dict *d, void(callback)(void*)) {

9 _dictClear(d,&d->ht[0],callback);

10 _dictClear(d,&d->ht[1],callback);

11 d->rehashidx = -1;

12 d->iterators = 0;

13 }

dictRelease不用多说,传入的callback为NULL。而dictEmpty,搜索redis源码所有文件的调用,

ccx@ccx:~/Desktop/redis-5.0.7/redis-5.0.7/src$ grep dictEmpty * -r          

blocked.c: dictEmpty(c->bpop.keys,NULL);

db.c: dictEmpty(server.db[j].dict,callback);

db.c: dictEmpty(server.db[j].expires,callback);

dict.c:void dictEmpty(dict *d, void(callback)(void*)) {

dict.h:void dictEmpty(dict *d, void(callback)(void*));

replication.c: dictEmpty(server.repl_scriptcache_dict,NULL);

sentinel.c: dictEmpty(server.commands,NULL);

仅db.c里传了callback进来,对应的方法为

1longlong emptyDb(int dbnum, int flags, void(callback)(void*));

继续搜索:

ccx@ccx:~/Desktop/redis-5.0.7/redis-5.0.7/src$ grep emptyDb * -r

cluster.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);

db.c:longlong emptyDb(int dbnum, int flags, void(callback)(void*)) {

db.c: emptyDbAsync(&server.db[j]);

db.c:/* Return the set of flags to use for the emptyDb() call for FLUSHALL*/

db.c: server.dirty += emptyDb(c->db->id,flags,NULL);

db.c: server.dirty += emptyDb(-1,flags,NULL);

debug.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);

debug.c: emptyDb(-1,EMPTYDB_NO_FLAGS,NULL);

lazyfree.c:void emptyDbAsync(redisDb *db) {

replication.c: * data with emptyDb(), and while we load the new data received as an

replication.c:/* Callback used by emptyDb() while flushing away old data to load*/

replication.c: emptyDb(

server.h:longlong emptyDb(int dbnum, int flags, void(callback)(void*));

server.h:void emptyDbAsync(redisDb *db);

 真正调用的地方传入的也是NULL,并不知道是拿来做什么用的...

ps:用grep查找只是方便cv过来....

 

七、迭代器操作

 数据结构:

1 typedef struct dictIterator {

2 dict *d;

3long index;

4int table, safe;

5 dictEntry *entry, *nextEntry;

6/* unsafe iterator fingerprint for misuse detection. */

7longlong fingerprint;

8 } dictIterator;

根据源码注释可知,如果是个安全的迭代器,即safe == 1,则在迭代中可以调用dictAdd、dictFind等方法,否则只能调用dictNext。

index表示,ht[table]对应的bucket的idx。

获取迭代器:

 1 dictIterator *dictGetIterator(dict *d)

2{

3 dictIterator *iter = zmalloc(sizeof(*iter));

4

5 iter->d = d;

6 iter->table = 0;

7 iter->index = -1;

8 iter->safe = 0;

9 iter->entry = NULL;

10 iter->nextEntry = NULL;

11return iter;

12}

13

14 dictIterator *dictGetSafeIterator(dict *d) {

15 dictIterator *i = dictGetIterator(d);

16

17 i->safe = 1;

18return i;

19 }

刚获取的迭代器并不指向具体哪个dictEntry

next操作:

 1 dictEntry *dictNext(dictIterator *iter)

2{

3while (1) {

4if (iter->entry == NULL) {

5 dictht *ht = &iter->d->ht[iter->table];

6if (iter->index == -1 && iter->table == 0) {

7if (iter->safe)

8 iter->d->iterators++;

9else

10 iter->fingerprint = dictFingerprint(iter->d);

11 }

12 iter->index++;

13if (iter->index >= (long) ht->size) {

14if (dictIsRehashing(iter->d) && iter->table == 0) {

15 iter->table++;

16 iter->index = 0;

17 ht = &iter->d->ht[1];

18 } else {

19break;

20 }

21 }

22 iter->entry = ht->table[iter->index];

23 } else {

24 iter->entry = iter->nextEntry;

25 }

26if (iter->entry) {

27/* We need to save the "next" here, the iterator user

28 * may delete the entry we are returning. */

29 iter->nextEntry = iter->entry->next;

30return iter->entry;

31 }

32 }

33return NULL;

34 }

对于一个新的迭代器,首次调用时,会根据是否安全,做不同操作。安全的迭代器会给dict里的计数器+1,不安全的将会记录本字典的指纹。之后会遍历ht[0],取到第一个非NULL的dictEntry。当ht[0]遍历完且取不到非NULL的dictEntry时,如果正在进行rehashing操作,则会去ht[1]里找。

如:

 1/*

2

3 +-------------------------+

4+----|dict * |

5| +-------------------------+

6| |long index |

7| +-------------------------+

8| |int table |

9| +-------------------------+

10| |int safe |

11| +-------------------------+

12| |dictEntry *entry |->NULL

13| +-------------------------+

14| |dictEntry *entrynextEntry|->NULL

15| +-------------------------+

16| |long long fingerprint |

17| +-------------------------+

18|

19|

20|

21| +-->NULL

22| +->+----------+ /

23| | |dictEntry*|/ +----+

24| | +----------+ +-->|K2|V|->NULL

25| | |dictEntry*|/ +----+

26+--->+------------+ /+-----------+ | +----------+

27 |dictType* | / |dictEntry**|--+ |dictEntry*| +----+

28 +------------+ / +-----------+ +----------+ +-->|K3|V|->NULL

29 |privdata | / |size=4 | |dictEntry*| +----+

30 +------------+/ +-----------+ +----------+ +----+

31 |ht[2] | |sizemask=3 | +->|K4|V|->NULL

32 +------------+ +-----------+ +----+

33 |rehashidx=1 | |used=3 |

34 +------------+ +-----------+

35 |iterators=0 |

36 +------------+ +-----------+ +->+----------+ +----+ +----+

37 |dictEntry**|--+ |dictEntry*|-->|K1|V|->|K5|V|->NULL

38 +-----------+ +----------+ +----+ +----+

39 |size=8 | |dictEntry*|->NULL

40 +-----------+ +----------+

41 |sizemask=7 | |dictEntry*|->NULL

42 +-----------+ +----------+

43 |used=3 | |dictEntry*|->NULL

44 +-----------+ +----------+

45 |dictEntry*|->NULL

46 +----------+

47 |dictEntry*|->NULL

48 +----------+

49 |dictEntry*|->NULL

50 +----------+

51 |dictEntry*|->NULL

52 +----------+

53*/

遍历顺序为,K2,K3,K4,K1,K5。

迭代器销毁:

 1void dictReleaseIterator(dictIterator *iter)

2{

3if (!(iter->index == -1 && iter->table == 0)) {

4if (iter->safe)

5 iter->d->iterators--;

6else

7 assert(iter->fingerprint == dictFingerprint(iter->d));

8 }

9 zfree(iter);

10 }

与首次执行next操作相对应,若为safe的迭代器,要给dict的计算减1,否则要校验期间dict的指纹是否发生了变化 。

指纹的计算:

 1longlong dictFingerprint(dict *d) {

2longlong integers[6], hash = 0;

3int j;

4

5 integers[0] = (long) d->ht[0].table;

6 integers[1] = d->ht[0].size;

7 integers[2] = d->ht[0].used;

8 integers[3] = (long) d->ht[1].table;

9 integers[4] = d->ht[1].size;

10 integers[5] = d->ht[1].used;

11

12/* We hash N integers by summing every successive integer with the integer

13 * hashing of the previous sum. Basically:

14 *

15 * Result = hash(hash(hash(int1)+int2)+int3) ...

16 *

17 * This way the same set of integers in a different order will (likely) hash

18 * to a different number. */

19for (j = 0; j < 6; j++) {

20 hash += integers[j];

21/* For the hashing step we use Tomas Wang"s 64 bit integer hash. */

22 hash = (~hash) + (hash << 21); // hash = (hash << 21) - hash - 1;

23 hash = hash ^ (hash >> 24);

24 hash = (hash + (hash << 3)) + (hash << 8); // hash * 265

25 hash = hash ^ (hash >> 14);

26 hash = (hash + (hash << 2)) + (hash << 4); // hash * 21

27 hash = hash ^ (hash >> 28);

28 hash = hash + (hash << 31);

29 }

30return hash;

31 }

对于不安全的迭代器,在迭代过程中,不允许操作任何修改dict的操作,是只读的,不会发生迭代器失效的问题。对于安全的迭代器,在进行操作本节点的时候,redis中记录了当前迭代的bucket idx,以及当前dictEntry的next节点。如果只是add操作,即使是用了头插法把新dictEntry插在本节点之前,对迭代器本身并没有影响。如果是delete了本节点,迭代器中还记录了next节点的位置,调用next时直接取就好。如果next为空,则可以认为当前bucket遍历完了,取下一个bucket就行了。当然,如果在add/delete等操作的时候,进行了rehashing操作,那么当前迭代器里记录的next,在rehashing之后,可能就不是当前节点新位置的next了。所以在使用安全迭代器的时候,禁止了rehashing操作。

 

八、其它操作

dict还支持其它的一些操作。

1、随机获取一个key,可以用于一些随机操作的dictGetRandomKey

2、随机获取n个key:dictGetSomeKeys

3、scan操作

关于scan操作,redis采用了一个很巧妙的方法,保证了在开始scan时未删除的元素一定能遍历到,又能保证尽量少地重复遍历。

这里直接给个传送门,这里讲得很好:

https://blog.csdn.net/gqtcgq/article/details/50533336

 

redis 5.0.7 下载链接

http://download.redis.io/releases/redis-5.0.7.tar.gz

源码阅读顺序参考:

https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst

其它参考:

https://zhuanlan.zhihu.com/p/42156903

 

以上是 redis5.0.7源码阅读——字典dict 的全部内容, 来源链接: utcz.com/z/532094.html

回到顶部