本文源码来自redis 6.2
link:https://github.com/redis/redis/tree/6.2
https://github.com/redis/redis/blob/6.2/src/sds.h
https://github.com/redis/redis/blob/6.2/src/sds.c
sds简介
sds(simple dynamic string)全称简单动态字符串,它使用C语言封装了一个二进制安全的字符串函数库,区别于C语言的字符数组使用’\0’作为结束符,sds维护了一个header,用于保存当前字符数组的使用长度等,因此sds可以保证存入其中的数据完整性而不发生截断,redis使用sds作为其中的字符串表示
+--------+-------------------------------+-----------+
| Header | Binary safe C alike string... | Null term |
+--------+-------------------------------+-----------+
|
`-> Pointer returned to the user.
sds内存结构
redis 4.0之前
struct sdshdr {
unsigned int len;
unsigned int free;
char buf[];
};
static inline size_t sdslen(const sds s) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->len;
}
static inline size_t sdsavail(const sds s) {
struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
return sh->free;
}
redis 4.0及之后
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
static inline size_t sdslen(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->len;
case SDS_TYPE_16:
return SDS_HDR(16,s)->len;
case SDS_TYPE_32:
return SDS_HDR(32,s)->len;
case SDS_TYPE_64:
return SDS_HDR(64,s)->len;
}
return 0;
}
static inline void sdsinclen(sds s, size_t inc) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len += inc;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len += inc;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len += inc;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len += inc;
break;
}
}

+-------------+-------+------------------------+-----------+-------------\
| Len | Alloc | Flags | H E L L O W O R L D \n | Null term | Free space \
+---------------------+------------------------+-----------+-------------\
| | |
`-> sds hdr `-> sds `-> '\0'
- C语言struct默认按照字节对齐,使用struct __attribute__ ((__packed__)) sdshdr8声明的sdshdr8结构取消字节对齐,紧凑排列
- 为了节省空间,redis为不同大小的sds使用了不同的结构。在给sds扩容时,会动态调整头部结构,并且分配min(1M, sds.len)大小的额外空间,以减少内存分配次数
- redis的sds结构使用的内存空间为hdrlen + initlen + 1,其中hdrlen是sds的头部大小,initlen是sds的字符数组大小,1是sds字符数组后\0标志位的大小。之所以保存以空字符 '\0’结尾就是为了让那些保存文本数据的SDS可以重用一部分<string.h>库定义的函数。
- sds的存储包括两部分。第一部分是sds头,包含len, alloc, flags, buf,其中len是sds的buf使用的长度,alloc是sds的buf总长度,flags是标志位,用于记录当前sds的类型,char buf[]是一个长度为0的字符数组,在C语言中称为柔性数组,主要是为了满足可变长度的字符数组。第二部分是buf数组的实际存储
- 在第39行的代码中,sdshdr##T的宏定义会在预编译时展开,将T的值拼接在sdshdr上。void*将被隐式转换为struct sdshdr##T*,SDS_HDR_VAR(5, s)将会得到变量sh,指向s的头部地址
sds源码
_sdsnewlen
使用init字符数组和指定长度initlen创建sds
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
* If SDS_NOINIT is used, the buffer is left uninitialized;
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
void *sh;
sds s;
char type = sdsReqType(initlen);
/* Empty strings are usually created in order to append. Use type 8
* since type 5 is not good at this. */
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
int hdrlen = sdsHdrSize(type);
unsigned char *fp; /* flags pointer. */
size_t usable;
assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */
sh = trymalloc?
s_trymalloc_usable(hdrlen+initlen+1, &usable) :
s_malloc_usable(hdrlen+initlen+1, &usable);
if (sh == NULL) return NULL;
if (init==SDS_NOINIT)
init = NULL;
else if (!init)
memset(sh, 0, hdrlen+initlen+1);
s = (char*)sh+hdrlen;
fp = ((unsigned char*)s)-1;
usable = usable-hdrlen-1;
if (usable > sdsTypeMaxSize(type))
usable = sdsTypeMaxSize(type);
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = usable;
*fp = type;
break;
}
}
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0';
return s;
}

sdsMakeRoomFor
将sds的空间在原来的基础上扩大addlen
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);
size_t len, newlen, reqlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
size_t usable;
/* Return ASAP if there is enough space left. */
if (avail >= addlen) return s;
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
reqlen = newlen = (len+addlen);
assert(newlen > len); /* Catch size_t overflow */
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
type = sdsReqType(newlen);
/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
assert(hdrlen + newlen + 1 > reqlen); /* Catch size_t overflow */
if (oldtype==type) {
newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
usable = usable-hdrlen-1;
if (usable > sdsTypeMaxSize(type))
usable = sdsTypeMaxSize(type);
sdssetalloc(s, usable);
return s;
}

sdscatlen
将字符数组t的前len个字符增加到sds的末尾,并动态扩容
/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
* end of the specified sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);
if (s == NULL) return NULL;
memcpy(s+curlen, t, len);
sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
sdsfree
释放sds的空间,需要回到sds头部再释放
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;
s_free((char*)s-sdsHdrSize(s[-1]));
}
sdsdup
深拷贝,使用当前sds创建一个新的sds
/* Duplicate an sds string. */
sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}
调试sdsTest
gcc -std=c99 sds.c -g -nostartfiles -esdsTest -o sds zmalloc.c -DREDIS_TEST
- 需要使用gcc编译,而不是g++,因为C语言会将void*隐式转换为其他类型,而C++不行
- -g 参数会在目标文件中添加调试信息
- -nostratfiles -esdsTest参数将函数入口变成sdsTest函数
- -o sds指定生成文件名
- zmalloc.c是sds特有的内存管理文件,需要指定在编译参数中
- -DRESID_TEST用来定义参数REDIS_TEST
gdb sds
#ifdef REDIS_TEST
#include <stdio.h>
#include <limits.h>
#include "testhelp.h"
#define UNUSED(x) (void)(x) //编译器对于未使用的变量会发出警告,这个函数是为了让编译器闭嘴:),类似go语言的 _ = x
int sdsTest(int argc, char **argv, int accurate) {
UNUSED(argc);
UNUSED(argv);
UNUSED(accurate);
// 选了一些有代表性的test
{
sds x = sdsnew("foo");
test_cond("Create a string and obtain the length",
sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0);
sdsfree(x);
x = sdsnewlen("\0\n\r",3);
test_cond("Create a string with specified length",
sdslen(x) == 3 && memcmp(x,"\0\n\r\0",4) == 0);
sdsfree(x);
x = sdsnewlen("foo",2);
test_cond("Create a string with specified length",
sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0);
x = sdscat(x,"bar");
test_cond("Strings concatenation",
sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);
x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
test_cond("sdscpy() against an originally shorter string",
sdslen(x) == 33 &&
memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0);
{
sdsfree(x);
char etalon[1024*1024];
for (size_t i = 0; i < sizeof(etalon); i++) {
etalon[i] = '0';
}
x = sdscatprintf(sdsempty(),"%0*d",(int)sizeof(etalon),0);
test_cond("sdscatprintf() can print 1MB",
sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0);
}
}
test_report();
return 0;
}
#endif
调试环境:公司开发机







一些问题
- 在 sdsMakeRoomFor 中发现注释 “不要使用sdshdr5”,实际调试时仍然会使用 sdshdr5?
为了节省内存,允许初始化时使用sdshdr5,在对sds进行扩容时会将sdshdr5扩展成sdshdr8 - _sdsnewlen会使用zmalloc.c下的zmalloc_usable函数,zmalloc.c在分配内存时做了些什么?
在编译时执行编译参数可以分别使用glibc的ptmalloc、google的tcmalloc,facebook的jemalloc作为内存管理工具,zmalloc将这几种内存管理工具封装成了一个整体,为上层提供统一接口 - malloc时需要传入申请的内存大小,但是在free时只需要传入一个指针,free怎么知道它的大小?
glibc的malloc相关源码没太看懂,参考CSAPP,在malloc时会申请更大的内存空间,用于存储当前内存的长度等信息,类似sds
reference
- redis6.2 README
- SDS README
- GDB入门指南
本人才疏学浅,理解水平有限,如果有错误及不当之处,请批评指正。
