redis5.0.7源码阅读——整数集合intset

database

redis中整数集合intset相关的文件为:intset.h与intset.c

intset的所有操作与操作一个排序整形数组 int a[N]类似,只是根据类型做了内存上的优化。

一、数据结构

1 typedef struct intset {

2 uint32_t encoding;

3 uint32_t length;

4 int8_t contents[];

5 } intset;

intset的数据结构比较简单,使用了一个变长结构体,成员length记录当前成员数量,成员encoding记录当前的int类型,共有以下三种:

1#define INTSET_ENC_INT16 (sizeof(int16_t))

2#define INTSET_ENC_INT32 (sizeof(int32_t))

3#define INTSET_ENC_INT64 (sizeof(int64_t))

并使用以下方法进行判断类型:

1static uint8_t _intsetValueEncoding(int64_t v) {

2if (v < INT32_MIN || v > INT32_MAX)

3return INTSET_ENC_INT64;

4elseif (v < INT16_MIN || v > INT16_MAX)

5return INTSET_ENC_INT32;

6else

7return INTSET_ENC_INT16;

8 }

intset是已排序好的整数集合,其大致结构如下:

1/*

2+--------+--------+--------...--------------+

3|encoding|length |contents(encoding*length)|

4+--------+--------+--------...--------------+

5*/

intset严格按照小端字节序进行存储,不论机器的字节序类型。如果是大端机器,需要进行转换,才进行存储。endianconv.h中有如下定义:

 1#if (BYTE_ORDER == LITTLE_ENDIAN)

2#define memrev16ifbe(p) ((void)(0))

3#define memrev32ifbe(p) ((void)(0))

4#define memrev64ifbe(p) ((void)(0))

5#define intrev16ifbe(v) (v)

6#define intrev32ifbe(v) (v)

7#define intrev64ifbe(v) (v)

8#else

9#define memrev16ifbe(p) memrev16(p)

10#define memrev32ifbe(p) memrev32(p)

11#define memrev64ifbe(p) memrev64(p)

12#define intrev16ifbe(v) intrev16(v)

13#define intrev32ifbe(v) intrev32(v)

14#define intrev64ifbe(v) intrev64(v)

15#endif

具体实现在endianconv.c中,此处略过。

 

二、创建

1 intset *intsetNew(void) {

2 intset *is = zmalloc(sizeof(intset));

3is->encoding = intrev32ifbe(INTSET_ENC_INT16);

4is->length = 0;

5returnis;

6 }

刚创建好的intset是空的,默认使用最小的类型。其结构为:

1/*此处用一根“-”表示一字节,后同

2+----+----+

3| 16| 0|

4+----+----+

5*/

 

三、 操作

若有以下intset:

1/*

2+----+----+--+--+--+--+--+--+--+

3| 16| 7| 1| 2| 3| 4| 5| 7| 8|

4+----+----+--+--+--+--+--+--+--+

5 |contents

6

7*/

现在插入一个数字6,需要调用以下方法:

 1/* Insert an integer in the intset */

2 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {

3 uint8_t valenc = _intsetValueEncoding(value);

4 uint32_t pos;

5if (success) *success = 1;

6

7/* Upgrade encoding if necessary. If we need to upgrade, we know that

8 * this value should be either appended (if > 0) or prepended (if < 0),

9 * because it lies outside the range of existing values. */

10if (valenc > intrev32ifbe(is->encoding)) {

11/* This always succeeds, so we don"t need to curry *success. */

12return intsetUpgradeAndAdd(is,value);

13 } else {

14/* Abort if the value is already present in the set.

15 * This call will populate "pos" with the right position to insert

16 * the value when it cannot be found. */

17if (intsetSearch(is,value,&pos)) {

18if (success) *success = 0;

19returnis;

20 }

21

22is = intsetResize(is,intrev32ifbe(is->length)+1);

23if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);

24 }

25

26 _intsetSet(is,pos,value);

27is->length = intrev32ifbe(intrev32ifbe(is->length)+1);

28returnis;

29 }

因int16_t足以存储数字“6”,所以新插入数字的int类型与intset一致,然后需要查找插入的pos:

 1static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {

2int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;

3 int64_t cur = -1;

4

5/* The value can never be found when the set is empty */

6if (intrev32ifbe(is->length) == 0) {

7if (pos) *pos = 0;

8return0;

9 } else {

10/* Check for the case where we know we cannot find the value,

11 * but do know the insert position. */

12if (value > _intsetGet(is,max)) {

13if (pos) *pos = intrev32ifbe(is->length);

14return0;

15 } elseif (value < _intsetGet(is,0)) {

16if (pos) *pos = 0;

17return0;

18 }

19 }

20

21while(max >= min) {

22 mid = ((unsigned int)min + (unsigned int)max) >> 1;

23 cur = _intsetGet(is,mid);

24if (value > cur) {

25 min = mid+1;

26 } elseif (value < cur) {

27 max = mid-1;

28 } else {

29break;

30 }

31 }

32

33if (value == cur) {

34if (pos) *pos = mid;

35return1;

36 } else {

37if (pos) *pos = min;

38return0;

39 }

40 }

因intset是已排序好的,所以使用了二分查找。过程如下

 1/*

2find 6

3 +----+----+--+--+--+--+--+--+--+

4 | 16| 7| 1| 2| 3| 4| 5| 7| 8|

5 +----+----+--+--+--+--+--+--+--+

6pos | 0| 1| 2| 3| 4| 5| 6|

7step1 |min=0

8 |max=6

9 |mid=(0+6)>>1=3

10 |mid_val=4

11

12pos | 0| 1| 2| 3| 4| 5| 6|

13step2 |min=4

14 |max=6

15 |mid=(4+6)>>1=5

16 |mid_val=7

17

18pos | 0| 1| 2| 3| 4| 5| 6|

19step3 |min=4

20 |max=4

21 |mid=(4+4)>>1=5

22 |mid_val=5

23

24pos | 0| 1| 2| 3| 4| 5| 6|

25step4 |min=5

26 |max=4

27min>max break

28*/

6在intset中不存在,查找到需要插入到pos=5的位置,此时首先要扩展intset的content:

1static intset *intsetResize(intset *is, uint32_t len) {

2 uint32_t size = len*intrev32ifbe(is->encoding);

3is = zrealloc(is,sizeof(intset)+size);

4returnis;

5 }

扩展后:

1/*

2+----+----+--+--+--+--+--+--+--+--+

3| 16| 7| 1| 2| 3| 4| 5| 7| 8| |

4+----+----+--+--+--+--+--+--+--+--+

5pos | 0| 1| 2| 3| 4| 5| 6| 7|

6*/

然后把原来在pos=5及之后的所有的元素向后移一格:

 1staticvoid intsetMoveTail(intset *is, uint32_t from, uint32_t to) {

2void *src, *dst;

3 uint32_t bytes = intrev32ifbe(is->length)-from;

4 uint32_t encoding = intrev32ifbe(is->encoding);

5

6if (encoding == INTSET_ENC_INT64) {

7 src = (int64_t*)is->contents+from;

8 dst = (int64_t*)is->contents+to;

9 bytes *= sizeof(int64_t);

10 } elseif (encoding == INTSET_ENC_INT32) {

11 src = (int32_t*)is->contents+from;

12 dst = (int32_t*)is->contents+to;

13 bytes *= sizeof(int32_t);

14 } else {

15 src = (int16_t*)is->contents+from;

16 dst = (int16_t*)is->contents+to;

17 bytes *= sizeof(int16_t);

18 }

19 memmove(dst,src,bytes);

20 }

移动后:

1/*

2+----+----+--+--+--+--+--+--+--+--+

3| 16| 7| 1| 2| 3| 4| 5| 7| 7| 8|

4+----+----+--+--+--+--+--+--+--+--+

5pos | 0| 1| 2| 3| 4| 5| 6| 7|

6*/

其使用memmove,并不全修改未覆盖到的内存,所以此时pos=5的值 还是7

最后修改pos=5的值:

 1staticvoid _intsetSet(intset *is, int pos, int64_t value) {

2 uint32_t encoding = intrev32ifbe(is->encoding);

3

4if (encoding == INTSET_ENC_INT64) {

5 ((int64_t*)is->contents)[pos] = value;

6 memrev64ifbe(((int64_t*)is->contents)+pos);

7 } elseif (encoding == INTSET_ENC_INT32) {

8 ((int32_t*)is->contents)[pos] = value;

9 memrev32ifbe(((int32_t*)is->contents)+pos);

10 } else {

11 ((int16_t*)is->contents)[pos] = value;

12 memrev16ifbe(((int16_t*)is->contents)+pos);

13 }

14 }

修改后并增加了length:

1/*

2+----+----+--+--+--+--+--+--+--+--+

3| 16| 8| 1| 2| 3| 4| 5| 6| 7| 8|

4+----+----+--+--+--+--+--+--+--+--+

5pos | 0| 1| 2| 3| 4| 5| 6| 7|

6*/

 

如果此时要插入的数字是65536,超出了int16_t所能表示的范围,要先进行扩展int类型操作:

 1static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {

2 uint8_t curenc = intrev32ifbe(is->encoding);

3 uint8_t newenc = _intsetValueEncoding(value);

4int length = intrev32ifbe(is->length);

5int prepend = value < 0 ? 1 : 0;

6

7/* First set new encoding and resize */

8is->encoding = intrev32ifbe(newenc);

9is = intsetResize(is,intrev32ifbe(is->length)+1);

10

11/* Upgrade back-to-front so we don"t overwrite values.

12 * Note that the "prepend" variable is used to make sure we have an empty

13 * space at either the beginning or the end of the intset. */

14while(length--)

15 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));

16

17/* Set the value at the beginning or the end. */

18if (prepend)

19 _intsetSet(is,0,value);

20else

21 _intsetSet(is,intrev32ifbe(is->length),value);

22is->length = intrev32ifbe(intrev32ifbe(is->length)+1);

23returnis;

24 }

因其超出原来的int类型所能表示的范围,若为正数,一定是最大的,则应该插入在intset最后,否则应该在最前面。扩展完之后,从后往前将原来的数字,以新的int类型,放置在新的位置上,保证不会有未处理的数字被覆盖,处理完整。

 

删除操作:

 1 intset *intsetRemove(intset *is, int64_t value, int *success) {

2 uint8_t valenc = _intsetValueEncoding(value);

3 uint32_t pos;

4if (success) *success = 0;

5

6if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {

7 uint32_t len = intrev32ifbe(is->length);

8

9/* We know we can delete */

10if (success) *success = 1;

11

12/* Overwrite value with tail and update length */

13if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);

14is = intsetResize(is,len-1);

15is->length = intrev32ifbe(len-1);

16 }

17returnis;

18 }

找到指定元素之后,直接把后面的内存移至前面,然后resize。

 

redis 5.0.7 下载链接

http://download.redis.io/releases/redis-5.0.7.tar.gz

源码阅读顺序参考:

https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst

 

以上是 redis5.0.7源码阅读——整数集合intset 的全部内容, 来源链接: utcz.com/z/532164.html

回到顶部