redis源码阅读——动态字符串sds

database

redis中动态字符串sds相关的文件为:sds.h与sds.c

一、数据结构

redis中定义了自己的数据类型"sds",用于描述 char*,与一些数据结构

 1 typedef char *sds;

2

3/* Note: sdshdr5 is never used, we just access the flags byte directly.

4 * However is here to document the layout of type 5 SDS strings. */

5struct __attribute__ ((__packed__)) sdshdr5 {

6 unsigned char flags; /* 3 lsb of type, and 5 msb of string length */

7char buf[];

8};

9struct __attribute__ ((__packed__)) sdshdr8 {

10 uint8_t len; /* used */

11 uint8_t alloc; /* excluding the header and null terminator */

12 unsigned char flags; /* 3 lsb of type, 5 unused bits */

13char buf[];

14};

15struct __attribute__ ((__packed__)) sdshdr16 {

16 uint16_t len; /* used */

17 uint16_t alloc; /* excluding the header and null terminator */

18 unsigned char flags; /* 3 lsb of type, 5 unused bits */

19char buf[];

20};

21struct __attribute__ ((__packed__)) sdshdr32 {

22 uint32_t len; /* used */

23 uint32_t alloc; /* excluding the header and null terminator */

24 unsigned char flags; /* 3 lsb of type, 5 unused bits */

25char buf[];

26};

27struct __attribute__ ((__packed__)) sdshdr64 {

28 uint64_t len; /* used */

29 uint64_t alloc; /* excluding the header and null terminator */

30 unsigned char flags; /* 3 lsb of type, 5 unused bits */

31char buf[];

32 };

定义结构体时,加上了 __attribute__ ((__packed__)) 关键字,用于取消字节对齐,使其按照紧凑排列的方式,占用内存。这样做的目的并不仅仅只是为了节约内存的使用。结构体最后有一个 char buf[],查了资料之后了解到,其只是定义一个数组符号,并没有任何成员,不占用结构体的内存空间,其真实地址紧随结构体之后,可实现变长结构体。由此可以只根据sds字符串的真实地址,取到sds结构体的任意成员变量数据。如flags:

1void func(const sds s)

2{

3 unsigned char flags = s[-1];

4 }

这个flags,从源码的注释上看,其低三位用于表示sds类型,高五位是当sds类型为sdshdr5时,表明字符串长度的。对于非sdshdr5的类型,有专门的成员变量描述当前已使用的长度,及总buffer长度。

sds类型:

1#define SDS_TYPE_5  0

2#define SDS_TYPE_8 1

3#define SDS_TYPE_16 2

4#define SDS_TYPE_32 3

5#define SDS_TYPE_64 4

sds定义了两个宏,用于获取sds结构体首地址:

1#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));

2#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))

由此可见sds结构体的大致结构为:

 1/*

2sdshdr5

3+--------+----...---+

4|00011000|abc |

5+--------+----...---+

6|flags |buf

7

8sdshdr8

9+--------+--------+--------+----...---+

10|00000011|00000011|00000001|abc |

11+--------+--------+--------+----...---+

12|len |alloc |flags |buf

13*/

sds的一些常规操作,如获取字符串长度、获取剩余buf长度等,都是其于以上操作,首先根据sds字符串地址获取其flags的值,根据flags低三位判断sds类型,接着使用宏SDS_HDR_VAR或SDS_HDR进行操作。如:

 1#define SDS_TYPE_MASK 7   //0000,0111

2

3static inline size_t sdslen(const sds s) {

4//获取flags

5 unsigned char flags = s[-1];

6//根据flags低三位取类型,根据类型做不同处理

7switch(flags&SDS_TYPE_MASK) {

8case SDS_TYPE_5:

9return SDS_TYPE_5_LEN(flags);

10case SDS_TYPE_8:

11return SDS_HDR(8,s)->len;

12case SDS_TYPE_16:

13return SDS_HDR(16,s)->len;

14case SDS_TYPE_32:

15return SDS_HDR(32,s)->len;

16case SDS_TYPE_64:

17return SDS_HDR(64,s)->len;

18 }

19return0;

20 }

关于sds结构体中的len与alloc,len表示的是sds字符串的当前长度,alloc表示的是buf的总长度。

二、一些操作。

首先是一个根据字符串长度来决定sds类型的方法

 1static inline char sdsReqType(size_t string_size) {

2if (string_size < 1<<5) //flags高五位最大数字为 1<<5 - 1

3return SDS_TYPE_5;

4if (string_size < 1<<8) //uint8_t 最大数字为 1<<8 - 1

5return SDS_TYPE_8;

6if (string_size < 1<<16) //uint16_t 最大数字为 1<<16 - 1

7return SDS_TYPE_16;

8#if (LONG_MAX == LLONG_MAX) //区分32位/64位系统

9if (string_size < 1ll<<32)

10return SDS_TYPE_32;

11return SDS_TYPE_64;

12#else

13return SDS_TYPE_32;

14#endif

15 }

创建一个新的sds结构体:

 1 sds sdsnewlen(constvoid *init, size_t initlen) {

2void *sh;

3 sds s;

4char type = sdsReqType(initlen);

5if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;

6int hdrlen = sdsHdrSize(type);

7 unsigned char *fp; /* flags pointer. */

8

9 sh = s_malloc(hdrlen+initlen+1);

10if (init==SDS_NOINIT)

11 init = NULL;

12elseif (!init)

13 memset(sh, 0, hdrlen+initlen+1);

14if (sh == NULL) return NULL;

15 s = (char*)sh+hdrlen;

16 fp = ((unsigned char*)s)-1;

17switch(type) {

18case SDS_TYPE_5: {

19 *fp = type | (initlen << SDS_TYPE_BITS);

20break;

21 }

22case SDS_TYPE_8: {

23 SDS_HDR_VAR(8,s);

24 sh->len = initlen;

25 sh->alloc = initlen;

26 *fp = type;

27break;

28 }

29case SDS_TYPE_16: {

30//同SDS_TYPE_8,略

31 }

32case SDS_TYPE_32: {

33//同SDS_TYPE_8,略

34 }

35case SDS_TYPE_64: {

36//同SDS_TYPE_8,略

37 }

38 }

39if (initlen && init)

40 memcpy(s, init, initlen);

41 s[initlen] = "";

42return s;

43 }

由外部指定初始字符串与初始长度。先根据长度获取sds类型,然后根据不同类型,可以获得实际需要的总内存空间大小(包括sds结构体长度)。值得注意的是,如果初始长度为0的情况下,若为SDS_TYPE_5,则会被强制转为SDS_TYPE_8。根据源码的注释,空串的定义,通常是为了向后追加内容。SDS_TYPE_5并不适合这种场景。分配完内存空间之后,设置好sds结构体的值,再把初始字符串拷至sds字符串的实际初始位置上(如果有),就可以了。

本方法做为最底层的sds字符串初始化接口,被其它接口所调用,如:

 1//空string

2 sds sdsempty(void) {

3return sdsnewlen("",0);

4}

5

6//指定string

7 sds sdsnew(constchar *init) {

8 size_t initlen = (init == NULL) ? 0 : strlen(init);

9return sdsnewlen(init, initlen);

10}

11

12//从现有sds string拷贝

13 sds sdsdup(const sds s) {

14return sdsnewlen(s, sdslen(s));

15 }

sds的释放也不是简单地free sds字符串,同样,它要先找到sds结构体的首地址,再进行free:

1void sdsfree(sds s) {

2if (s == NULL) return;

3 s_free((char*)s-sdsHdrSize(s[-1]));

4 }

做为一个变长字符串,与传统c字符串,最大的区别,是可以动态扩展,就像c++ stl里的变长数组 vector一样。sds的扩容有自己的机制:

 1sds sdsMakeRoomFor(sds s, size_t addlen) {

2void *sh, *newsh;

3 size_t avail = sdsavail(s);

4 size_t len, newlen;

5char type, oldtype = s[-1] & SDS_TYPE_MASK;

6int hdrlen;

7

8/* Return ASAP if there is enough space left. */

9if (avail >= addlen) return s;

10

11 len = sdslen(s);

12 sh = (char*)s-sdsHdrSize(oldtype);

13 newlen = (len+addlen);

14if (newlen < SDS_MAX_PREALLOC)

15 newlen *= 2;

16else

17 newlen += SDS_MAX_PREALLOC;

18

19 type = sdsReqType(newlen);

20

21/* Don"t use type 5: the user is appending to the string and type 5 is

22 * not able to remember empty space, so sdsMakeRoomFor() must be called

23 * at every appending operation. */

24if (type == SDS_TYPE_5) type = SDS_TYPE_8;

25

26 hdrlen = sdsHdrSize(type);

27if (oldtype==type) {

28 newsh = s_realloc(sh, hdrlen+newlen+1);

29if (newsh == NULL) return NULL;

30 s = (char*)newsh+hdrlen;

31 } else {

32/* Since the header size changes, need to move the string forward,

33 * and can"t use realloc */

34 newsh = s_malloc(hdrlen+newlen+1);

35if (newsh == NULL) return NULL;

36 memcpy((char*)newsh+hdrlen, s, len+1);

37 s_free(sh);

38 s = (char*)newsh+hdrlen;

39 s[-1] = type;

40 sdssetlen(s, len);

41 }

42 sdssetalloc(s, newlen);

43return s;

44 }

本方法用于扩容sds,并可以指定长度。首先其先取出了当前还空闲的buf长度,方法如下:

 1static inline size_t sdsavail(const sds s) {

2 unsigned char flags = s[-1];

3switch(flags&SDS_TYPE_MASK) {

4case SDS_TYPE_5: {

5return0;

6 }

7case SDS_TYPE_8: {

8 SDS_HDR_VAR(8,s);

9return sh->alloc - sh->len;

10 }

11case SDS_TYPE_16: {

12 SDS_HDR_VAR(16,s);

13return sh->alloc - sh->len;

14 }

15case SDS_TYPE_32: {

16 SDS_HDR_VAR(32,s);

17return sh->alloc - sh->len;

18 }

19case SDS_TYPE_64: {

20 SDS_HDR_VAR(64,s);

21return sh->alloc - sh->len;

22 }

23 }

24return0;

25 }

若当前空闲的长度,比需要的长度大,则认为不用再额外分配空间,直接return。否则就启用扩容操作。

扩容时,先根据当前已使用的长度len与需要增加的长度addlen,算出一个初始新长度newlen,然后对其进行判断,若newlen大于1M,则在newlen的基础上,继续增加1M,否则直接翻倍。然后再根据newlen的最终大小,获取sds的新类型。此时,若类型依然为SDS_TYPE_5,也要强行修正为SDS_TYPE_8。因为SDS_TYPE_5类型并不知道当前空闲空间的大小。此时,若sds的新类型与原来相同,则只需要调用realloc重新分配一下空间即可。此方法会分配出一块新空间的同时,把原来空间的内容拷过去,并释放原有空间。而sds类型发生改变的时候,就需要手动新造一个新的sds了。扩容完成之后,需要修正一下当前已使用的空间len,与总buf大小 alloc。

扩容完成之后,或者是其它什么操作,如人工修改了sds字符串,并更新的len的情况下,会存在空闲空间太大的情况。此时如果想释放这部分空间,sds也提供了相应的操作:

 1sds sdsRemoveFreeSpace(sds s) {

2void *sh, *newsh;

3char type, oldtype = s[-1] & SDS_TYPE_MASK;

4int hdrlen, oldhdrlen = sdsHdrSize(oldtype);

5 size_t len = sdslen(s);

6 size_t avail = sdsavail(s);

7 sh = (char*)s-oldhdrlen;

8

9/* Return ASAP if there is no space left. */

10if (avail == 0) return s;

11

12/* Check what would be the minimum SDS header that is just good enough to

13 * fit this string. */

14 type = sdsReqType(len);

15 hdrlen = sdsHdrSize(type);

16

17/* If the type is the same, or at least a large enough type is still

18 * required, we just realloc(), letting the allocator to do the copy

19 * only if really needed. Otherwise if the change is huge, we manually

20 * reallocate the string to use the different header type. */

21if (oldtype==type || type > SDS_TYPE_8) {

22 newsh = s_realloc(sh, oldhdrlen+len+1);

23if (newsh == NULL) return NULL;

24 s = (char*)newsh+oldhdrlen;

25 } else {

26 newsh = s_malloc(hdrlen+len+1);

27if (newsh == NULL) return NULL;

28 memcpy((char*)newsh+hdrlen, s, len+1);

29 s_free(sh);

30 s = (char*)newsh+hdrlen;

31 s[-1] = type;

32 sdssetlen(s, len);

33 }

34 sdssetalloc(s, len);

35return s;

36 }

操作与扩容类似,同样是会根据sds类型是否发生变化 ,来决定是使用realloc还是重新造一个sds。

除此之外,sds还实现了一些转义、数据类型转换、一些类似c风格的字符串操作等。如:strcpy、strcat、strlen、strcmp等。只是其更加多样化,如sds的strcat实现,就可以支持类似printf的方式。如:

 1/* Like sdscatprintf() but gets va_list instead of being variadic. */

2 sds sdscatvprintf(sds s, constchar *fmt, va_list ap) {

3 va_list cpy;

4char staticbuf[1024], *buf = staticbuf, *t;

5 size_t buflen = strlen(fmt)*2;

6

7/* We try to start using a static buffer for speed.

8 * If not possible we revert to heap allocation. */

9if (buflen > sizeof(staticbuf)) {

10 buf = s_malloc(buflen);

11if (buf == NULL) return NULL;

12 } else {

13 buflen = sizeof(staticbuf);

14 }

15

16/* Try with buffers two times bigger every time we fail to

17 * fit the string in the current buffer size. */

18while(1) {

19 buf[buflen-2] = "";

20 va_copy(cpy,ap);

21 vsnprintf(buf, buflen, fmt, cpy);

22 va_end(cpy);

23if (buf[buflen-2] != "") {

24if (buf != staticbuf) s_free(buf);

25 buflen *= 2;

26 buf = s_malloc(buflen);

27if (buf == NULL) return NULL;

28continue;

29 }

30break;

31 }

32

33/* Finally concat the obtained string to the SDS string and return it. */

34 t = sdscat(s, buf);

35if (buf != staticbuf) s_free(buf);

36return t;

37}

38

39/* Append to the sds string "s" a string obtained using printf-alike format

40 * specifier.

41 *

42 * After the call, the modified sds string is no longer valid and all the

43 * references must be substituted with the new pointer returned by the call.

44 *

45 * Example:

46 *

47 * s = sdsnew("Sum is: ");

48 * s = sdscatprintf(s,"%d+%d = %d",a,b,a+b).

49 *

50 * Often you need to create a string from scratch with the printf-alike

51 * format. When this is the need, just use sdsempty() as the target string:

52 *

53 * s = sdscatprintf(sdsempty(), "... your format ...", args);

54*/

55 sds sdscatprintf(sds s, constchar *fmt, ...) {

56 va_list ap;

57char *t;

58 va_start(ap, fmt);

59 t = sdscatvprintf(s,fmt,ap);

60 va_end(ap);

61return t;

62 }

其它方法在此不再过多描述

三、sds相比c的标准库优势:

1、相比于c标准库,获取字符串的len复杂读从O(N)降低到O(1),sds结构中存储了字符串的长度,所以类似strlen(str)的操作不会成为redis的性能瓶颈。
2、在内存分配策略上,redis总是会尝试多分配一些空间,比如小于1MB的字符串,总是分配2倍内存空间,对于大于1MB的空间追加1MB冗余空间,这对于字符串操作(如strcat等)能减少重新内存分配的几率,提升运行性能。
3、SDS总是安全的,sds总是会自动追加字符串结尾符号’’,有效防止溢出发生。
4、惰性释放内存,改变原字符串时,标准库需要重新分配内存的复杂度为O(N),SDS最大为O(N),最优情况下无需重新分配内存空间。

 

源码阅读顺序参考:

https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst

其它参考资料:

https://blog.csdn.net/zzuchengming/article/details/51840067

https://blog.csdn.net/junlon2006/article/details/101369299

以上是 redis源码阅读——动态字符串sds 的全部内容, 来源链接: utcz.com/z/532068.html

回到顶部