redis5.0.7源码阅读——整数集合intset
redis中整数集合intset相关的文件为:intset.h与intset.c
intset的所有操作与操作一个排序整形数组 int a[N]类似,只是根据类型做了内存上的优化。
一、数据结构
1 typedef struct intset {2 uint32_t encoding;3 uint32_t length;4 int8_t contents[];5 } intset;
intset的数据结构比较简单,使用了一个变长结构体,成员length记录当前成员数量,成员encoding记录当前的int类型,共有以下三种:
1#define INTSET_ENC_INT16 (sizeof(int16_t))2#define INTSET_ENC_INT32 (sizeof(int32_t))
3#define INTSET_ENC_INT64 (sizeof(int64_t))
并使用以下方法进行判断类型:
1static uint8_t _intsetValueEncoding(int64_t v) {2if (v < INT32_MIN || v > INT32_MAX)3return INTSET_ENC_INT64;4elseif (v < INT16_MIN || v > INT16_MAX)5return INTSET_ENC_INT32;6else7return INTSET_ENC_INT16;
8 }
intset是已排序好的整数集合,其大致结构如下:
1/*2+--------+--------+--------...--------------+
3|encoding|length |contents(encoding*length)|
4+--------+--------+--------...--------------+
5*/
intset严格按照小端字节序进行存储,不论机器的字节序类型。如果是大端机器,需要进行转换,才进行存储。endianconv.h中有如下定义:
1#if (BYTE_ORDER == LITTLE_ENDIAN)2#define memrev16ifbe(p) ((void)(0))
3#define memrev32ifbe(p) ((void)(0))
4#define memrev64ifbe(p) ((void)(0))
5#define intrev16ifbe(v) (v)
6#define intrev32ifbe(v) (v)
7#define intrev64ifbe(v) (v)
8#else
9#define memrev16ifbe(p) memrev16(p)
10#define memrev32ifbe(p) memrev32(p)
11#define memrev64ifbe(p) memrev64(p)
12#define intrev16ifbe(v) intrev16(v)
13#define intrev32ifbe(v) intrev32(v)
14#define intrev64ifbe(v) intrev64(v)
15#endif
具体实现在endianconv.c中,此处略过。
二、创建
1 intset *intsetNew(void) {2 intset *is = zmalloc(sizeof(intset));3is->encoding = intrev32ifbe(INTSET_ENC_INT16);4is->length = 0;5returnis;6 }
刚创建好的intset是空的,默认使用最小的类型。其结构为:
1/*此处用一根“-”表示一字节,后同2+----+----+
3| 16| 0|
4+----+----+
5*/
三、 操作
若有以下intset:
1/*2+----+----+--+--+--+--+--+--+--+
3| 16| 7| 1| 2| 3| 4| 5| 7| 8|
4+----+----+--+--+--+--+--+--+--+
5 |contents
6
7*/
现在插入一个数字6,需要调用以下方法:
1/* Insert an integer in the intset */2 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
3 uint8_t valenc = _intsetValueEncoding(value);
4 uint32_t pos;
5if (success) *success = 1;
6
7/* Upgrade encoding if necessary. If we need to upgrade, we know that
8 * this value should be either appended (if > 0) or prepended (if < 0),
9 * because it lies outside the range of existing values. */
10if (valenc > intrev32ifbe(is->encoding)) {
11/* This always succeeds, so we don"t need to curry *success. */
12return intsetUpgradeAndAdd(is,value);
13 } else {
14/* Abort if the value is already present in the set.
15 * This call will populate "pos" with the right position to insert
16 * the value when it cannot be found. */
17if (intsetSearch(is,value,&pos)) {
18if (success) *success = 0;
19returnis;
20 }
21
22is = intsetResize(is,intrev32ifbe(is->length)+1);
23if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
24 }
25
26 _intsetSet(is,pos,value);
27is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
28returnis;
29 }
因int16_t足以存储数字“6”,所以新插入数字的int类型与intset一致,然后需要查找插入的pos:
1static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { 2int min = 0, max = intrev32ifbe(is->length)-1, mid = -1; 3 int64_t cur = -1; 45/* The value can never be found when the set is empty */
6if (intrev32ifbe(is->length) == 0) {
7if (pos) *pos = 0;
8return0;
9 } else {
10/* Check for the case where we know we cannot find the value,
11 * but do know the insert position. */
12if (value > _intsetGet(is,max)) {
13if (pos) *pos = intrev32ifbe(is->length);
14return0;
15 } elseif (value < _intsetGet(is,0)) {
16if (pos) *pos = 0;
17return0;
18 }
19 }
20
21while(max >= min) {
22 mid = ((unsigned int)min + (unsigned int)max) >> 1;
23 cur = _intsetGet(is,mid);
24if (value > cur) {
25 min = mid+1;
26 } elseif (value < cur) {
27 max = mid-1;
28 } else {
29break;
30 }
31 }
32
33if (value == cur) {
34if (pos) *pos = mid;
35return1;
36 } else {
37if (pos) *pos = min;
38return0;
39 }
40 }
因intset是已排序好的,所以使用了二分查找。过程如下
1/*2find 6
3 +----+----+--+--+--+--+--+--+--+
4 | 16| 7| 1| 2| 3| 4| 5| 7| 8|
5 +----+----+--+--+--+--+--+--+--+
6pos | 0| 1| 2| 3| 4| 5| 6|
7step1 |min=0
8 |max=6
9 |mid=(0+6)>>1=3
10 |mid_val=4
11
12pos | 0| 1| 2| 3| 4| 5| 6|
13step2 |min=4
14 |max=6
15 |mid=(4+6)>>1=5
16 |mid_val=7
17
18pos | 0| 1| 2| 3| 4| 5| 6|
19step3 |min=4
20 |max=4
21 |mid=(4+4)>>1=5
22 |mid_val=5
23
24pos | 0| 1| 2| 3| 4| 5| 6|
25step4 |min=5
26 |max=4
27min>max break
28*/
6在intset中不存在,查找到需要插入到pos=5的位置,此时首先要扩展intset的content:
1static intset *intsetResize(intset *is, uint32_t len) {2 uint32_t size = len*intrev32ifbe(is->encoding);3is = zrealloc(is,sizeof(intset)+size);4returnis;5 }
扩展后:
1/*2+----+----+--+--+--+--+--+--+--+--+
3| 16| 7| 1| 2| 3| 4| 5| 7| 8| |
4+----+----+--+--+--+--+--+--+--+--+
5pos | 0| 1| 2| 3| 4| 5| 6| 7|
6*/
然后把原来在pos=5及之后的所有的元素向后移一格:
1staticvoid intsetMoveTail(intset *is, uint32_t from, uint32_t to) { 2void *src, *dst; 3 uint32_t bytes = intrev32ifbe(is->length)-from; 4 uint32_t encoding = intrev32ifbe(is->encoding); 56if (encoding == INTSET_ENC_INT64) {
7 src = (int64_t*)is->contents+from;
8 dst = (int64_t*)is->contents+to;
9 bytes *= sizeof(int64_t);
10 } elseif (encoding == INTSET_ENC_INT32) {
11 src = (int32_t*)is->contents+from;
12 dst = (int32_t*)is->contents+to;
13 bytes *= sizeof(int32_t);
14 } else {
15 src = (int16_t*)is->contents+from;
16 dst = (int16_t*)is->contents+to;
17 bytes *= sizeof(int16_t);
18 }
19 memmove(dst,src,bytes);
20 }
移动后:
1/*2+----+----+--+--+--+--+--+--+--+--+
3| 16| 7| 1| 2| 3| 4| 5| 7| 7| 8|
4+----+----+--+--+--+--+--+--+--+--+
5pos | 0| 1| 2| 3| 4| 5| 6| 7|
6*/
其使用memmove,并不全修改未覆盖到的内存,所以此时pos=5的值 还是7
最后修改pos=5的值:
1staticvoid _intsetSet(intset *is, int pos, int64_t value) { 2 uint32_t encoding = intrev32ifbe(is->encoding); 34if (encoding == INTSET_ENC_INT64) {
5 ((int64_t*)is->contents)[pos] = value;
6 memrev64ifbe(((int64_t*)is->contents)+pos);
7 } elseif (encoding == INTSET_ENC_INT32) {
8 ((int32_t*)is->contents)[pos] = value;
9 memrev32ifbe(((int32_t*)is->contents)+pos);
10 } else {
11 ((int16_t*)is->contents)[pos] = value;
12 memrev16ifbe(((int16_t*)is->contents)+pos);
13 }
14 }
修改后并增加了length:
1/*2+----+----+--+--+--+--+--+--+--+--+
3| 16| 8| 1| 2| 3| 4| 5| 6| 7| 8|
4+----+----+--+--+--+--+--+--+--+--+
5pos | 0| 1| 2| 3| 4| 5| 6| 7|
6*/
如果此时要插入的数字是65536,超出了int16_t所能表示的范围,要先进行扩展int类型操作:
1static intset *intsetUpgradeAndAdd(intset *is, int64_t value) { 2 uint8_t curenc = intrev32ifbe(is->encoding); 3 uint8_t newenc = _intsetValueEncoding(value); 4int length = intrev32ifbe(is->length); 5int prepend = value < 0 ? 1 : 0; 67/* First set new encoding and resize */
8is->encoding = intrev32ifbe(newenc);
9is = intsetResize(is,intrev32ifbe(is->length)+1);
10
11/* Upgrade back-to-front so we don"t overwrite values.
12 * Note that the "prepend" variable is used to make sure we have an empty
13 * space at either the beginning or the end of the intset. */
14while(length--)
15 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
16
17/* Set the value at the beginning or the end. */
18if (prepend)
19 _intsetSet(is,0,value);
20else
21 _intsetSet(is,intrev32ifbe(is->length),value);
22is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
23returnis;
24 }
因其超出原来的int类型所能表示的范围,若为正数,一定是最大的,则应该插入在intset最后,否则应该在最前面。扩展完之后,从后往前将原来的数字,以新的int类型,放置在新的位置上,保证不会有未处理的数字被覆盖,处理完整。
删除操作:
1 intset *intsetRemove(intset *is, int64_t value, int *success) { 2 uint8_t valenc = _intsetValueEncoding(value); 3 uint32_t pos; 4if (success) *success = 0; 56if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
7 uint32_t len = intrev32ifbe(is->length);
8
9/* We know we can delete */
10if (success) *success = 1;
11
12/* Overwrite value with tail and update length */
13if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
14is = intsetResize(is,len-1);
15is->length = intrev32ifbe(len-1);
16 }
17returnis;
18 }
找到指定元素之后,直接把后面的内存移至前面,然后resize。
redis 5.0.7 下载链接
http://download.redis.io/releases/redis-5.0.7.tar.gz
源码阅读顺序参考:
https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst
以上是 redis5.0.7源码阅读——整数集合intset 的全部内容, 来源链接: utcz.com/z/532164.html