你的位置:首页 > 信息动态 > 新闻中心
信息动态
联系我们

redis源码阅读笔记 - sds

2021/12/25 2:33:55

本文源码来自redis 6.2
link:https://github.com/redis/redis/tree/6.2
https://github.com/redis/redis/blob/6.2/src/sds.h
https://github.com/redis/redis/blob/6.2/src/sds.c

sds简介

sds(simple dynamic string)全称简单动态字符串,它使用C语言封装了一个二进制安全的字符串函数库,区别于C语言的字符数组使用’\0’作为结束符,sds维护了一个header,用于保存当前字符数组的使用长度等,因此sds可以保证存入其中的数据完整性而不发生截断,redis使用sds作为其中的字符串表示

+--------+-------------------------------+-----------+
| Header | Binary safe C alike string... | Null term |
+--------+-------------------------------+-----------+
         |
         `-> Pointer returned to the user.

sds内存结构

redis 4.0之前

struct sdshdr {
    unsigned int len; 
    unsigned int free;
    char buf[];
};

static inline size_t sdslen(const sds s) {
    struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
    return sh->len;
}

static inline size_t sdsavail(const sds s) {
    struct sdshdr *sh = (void*)(s-(sizeof(struct sdshdr)));
    return sh->free;
}

redis 4.0及之后

/* Note: sdshdr5 is never used, we just access the flags byte directly.
 * However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
    unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
    uint8_t len; /* used */
    uint8_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
    uint16_t len; /* used */
    uint16_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
    uint32_t len; /* used */
    uint32_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
    uint64_t len; /* used */
    uint64_t alloc; /* excluding the header and null terminator */
    unsigned char flags; /* 3 lsb of type, 5 unused bits */
    char buf[];
};

#define SDS_TYPE_5  0
#define SDS_TYPE_8  1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)

static inline size_t sdslen(const sds s) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            return SDS_TYPE_5_LEN(flags);
        case SDS_TYPE_8:
            return SDS_HDR(8,s)->len;
        case SDS_TYPE_16:
            return SDS_HDR(16,s)->len;
        case SDS_TYPE_32:
            return SDS_HDR(32,s)->len;
        case SDS_TYPE_64:
            return SDS_HDR(64,s)->len;
    }
    return 0;
}

static inline void sdsinclen(sds s, size_t inc) {
    unsigned char flags = s[-1];
    switch(flags&SDS_TYPE_MASK) {
        case SDS_TYPE_5:
            {
                unsigned char *fp = ((unsigned char*)s)-1;
                unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
                *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
            }
            break;
        case SDS_TYPE_8:
            SDS_HDR(8,s)->len += inc;
            break;
        case SDS_TYPE_16:
            SDS_HDR(16,s)->len += inc;
            break;
        case SDS_TYPE_32:
            SDS_HDR(32,s)->len += inc;
            break;
        case SDS_TYPE_64:
            SDS_HDR(64,s)->len += inc;
            break;
    }
}

在这里插入图片描述

     +-------------+-------+------------------------+-----------+-------------\
     | Len | Alloc | Flags | H E L L O W O R L D \n | Null term |  Free space \
     +---------------------+------------------------+-----------+-------------\
     |                     |                        |
     `->  sds hdr          `-> sds                  `-> '\0'
  1. C语言struct默认按照字节对齐,使用struct __attribute__ ((__packed__)) sdshdr8声明的sdshdr8结构取消字节对齐,紧凑排列
  2. 为了节省空间,redis为不同大小的sds使用了不同的结构。在给sds扩容时,会动态调整头部结构,并且分配min(1M, sds.len)大小的额外空间,以减少内存分配次数
  3. redis的sds结构使用的内存空间为hdrlen + initlen + 1,其中hdrlen是sds的头部大小,initlen是sds的字符数组大小,1是sds字符数组后\0标志位的大小。之所以保存以空字符 '\0’结尾就是为了让那些保存文本数据的SDS可以重用一部分<string.h>库定义的函数。
  4. sds的存储包括两部分。第一部分是sds头,包含len, alloc, flags, buf,其中len是sds的buf使用的长度,alloc是sds的buf总长度,flags是标志位,用于记录当前sds的类型,char buf[]是一个长度为0的字符数组,在C语言中称为柔性数组,主要是为了满足可变长度的字符数组。第二部分是buf数组的实际存储
  5. 在第39行的代码中,sdshdr##T的宏定义会在预编译时展开,将T的值拼接在sdshdr上。void*将被隐式转换为struct sdshdr##T*,SDS_HDR_VAR(5, s)将会得到变量sh,指向s的头部地址

sds源码

_sdsnewlen

使用init字符数组和指定长度initlen创建sds

/* Create a new sds string with the content specified by the 'init' pointer
 * and 'initlen'.
 * If NULL is used for 'init' the string is initialized with zero bytes.
 * If SDS_NOINIT is used, the buffer is left uninitialized;
 *
 * The string is always null-termined (all the sds strings are, always) so
 * even if you create an sds string with:
 *
 * mystring = sdsnewlen("abc",3);
 *
 * You can print the string with printf() as there is an implicit \0 at the
 * end of the string. However the string is binary safe and can contain
 * \0 characters in the middle, as the length is stored in the sds header. */
sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) {
    void *sh;
    sds s;
    char type = sdsReqType(initlen);
    /* Empty strings are usually created in order to append. Use type 8
     * since type 5 is not good at this. */
    if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
    int hdrlen = sdsHdrSize(type);
    unsigned char *fp; /* flags pointer. */
    size_t usable;

    assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */
    sh = trymalloc?
        s_trymalloc_usable(hdrlen+initlen+1, &usable) :
        s_malloc_usable(hdrlen+initlen+1, &usable);
    if (sh == NULL) return NULL;
    if (init==SDS_NOINIT)
        init = NULL;
    else if (!init)
        memset(sh, 0, hdrlen+initlen+1);
    s = (char*)sh+hdrlen;
    fp = ((unsigned char*)s)-1;
    usable = usable-hdrlen-1;
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    switch(type) {
        case SDS_TYPE_5: {
            *fp = type | (initlen << SDS_TYPE_BITS);
            break;
        }
        case SDS_TYPE_8: {
            SDS_HDR_VAR(8,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_16: {
            SDS_HDR_VAR(16,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_32: {
            SDS_HDR_VAR(32,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
        case SDS_TYPE_64: {
            SDS_HDR_VAR(64,s);
            sh->len = initlen;
            sh->alloc = usable;
            *fp = type;
            break;
        }
    }
    if (initlen && init)
        memcpy(s, init, initlen);
    s[initlen] = '\0';
    return s;
}

在这里插入图片描述

sdsMakeRoomFor

将sds的空间在原来的基础上扩大addlen

/* Enlarge the free space at the end of the sds string so that the caller
 * is sure that after calling this function can overwrite up to addlen
 * bytes after the end of the string, plus one more byte for nul term.
 *
 * Note: this does not change the *length* of the sds string as returned
 * by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
    void *sh, *newsh;
    size_t avail = sdsavail(s);
    size_t len, newlen, reqlen;
    char type, oldtype = s[-1] & SDS_TYPE_MASK;
    int hdrlen;
    size_t usable;

    /* Return ASAP if there is enough space left. */
    if (avail >= addlen) return s;

    len = sdslen(s);
    sh = (char*)s-sdsHdrSize(oldtype);
    reqlen = newlen = (len+addlen);
    assert(newlen > len);   /* Catch size_t overflow */
    if (newlen < SDS_MAX_PREALLOC)
        newlen *= 2;
    else
        newlen += SDS_MAX_PREALLOC;

    type = sdsReqType(newlen);

    /* Don't use type 5: the user is appending to the string and type 5 is
     * not able to remember empty space, so sdsMakeRoomFor() must be called
     * at every appending operation. */
    if (type == SDS_TYPE_5) type = SDS_TYPE_8;

    hdrlen = sdsHdrSize(type);
    assert(hdrlen + newlen + 1 > reqlen);  /* Catch size_t overflow */
    if (oldtype==type) {
        newsh = s_realloc_usable(sh, hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        s = (char*)newsh+hdrlen;
    } else {
        /* Since the header size changes, need to move the string forward,
         * and can't use realloc */
        newsh = s_malloc_usable(hdrlen+newlen+1, &usable);
        if (newsh == NULL) return NULL;
        memcpy((char*)newsh+hdrlen, s, len+1);
        s_free(sh);
        s = (char*)newsh+hdrlen;
        s[-1] = type;
        sdssetlen(s, len);
    }
    usable = usable-hdrlen-1;
    if (usable > sdsTypeMaxSize(type))
        usable = sdsTypeMaxSize(type);
    sdssetalloc(s, usable);
    return s;
}

在这里插入图片描述

sdscatlen

将字符数组t的前len个字符增加到sds的末尾,并动态扩容

/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
 * end of the specified sds string 's'.
 *
 * After the call, the passed sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call. */
sds sdscatlen(sds s, const void *t, size_t len) {
    size_t curlen = sdslen(s);

    s = sdsMakeRoomFor(s,len);
    if (s == NULL) return NULL;
    memcpy(s+curlen, t, len);
    sdssetlen(s, curlen+len);
    s[curlen+len] = '\0';
    return s;
}

sdsfree

释放sds的空间,需要回到sds头部再释放

/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
    if (s == NULL) return;
    s_free((char*)s-sdsHdrSize(s[-1]));
}

sdsdup

深拷贝,使用当前sds创建一个新的sds

/* Duplicate an sds string. */
sds sdsdup(const sds s) {
    return sdsnewlen(s, sdslen(s));
}

调试sdsTest

gcc -std=c99 sds.c -g -nostartfiles -esdsTest -o sds zmalloc.c -DREDIS_TEST

  1. 需要使用gcc编译,而不是g++,因为C语言会将void*隐式转换为其他类型,而C++不行
  2. -g 参数会在目标文件中添加调试信息
  3. -nostratfiles -esdsTest参数将函数入口变成sdsTest函数
  4. -o sds指定生成文件名
  5. zmalloc.c是sds特有的内存管理文件,需要指定在编译参数中
  6. -DRESID_TEST用来定义参数REDIS_TEST
    gdb sds
#ifdef REDIS_TEST
#include <stdio.h>
#include <limits.h>
#include "testhelp.h"

#define UNUSED(x) (void)(x) //编译器对于未使用的变量会发出警告,这个函数是为了让编译器闭嘴:),类似go语言的 _ = x

int sdsTest(int argc, char **argv, int accurate) {
    UNUSED(argc);
    UNUSED(argv);
    UNUSED(accurate);

    // 选了一些有代表性的test
    {
        sds x = sdsnew("foo");

        test_cond("Create a string and obtain the length",
            sdslen(x) == 3 && memcmp(x,"foo\0",4) == 0);

        sdsfree(x);
        x = sdsnewlen("\0\n\r",3);
        test_cond("Create a string with specified length",
            sdslen(x) == 3 && memcmp(x,"\0\n\r\0",4) == 0);
            
        sdsfree(x);
        x = sdsnewlen("foo",2);
        test_cond("Create a string with specified length",
            sdslen(x) == 2 && memcmp(x,"fo\0",3) == 0);

        x = sdscat(x,"bar");
        test_cond("Strings concatenation",
            sdslen(x) == 5 && memcmp(x,"fobar\0",6) == 0);

        x = sdscpy(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk");
        test_cond("sdscpy() against an originally shorter string",
            sdslen(x) == 33 &&
            memcmp(x,"xyzxxxxxxxxxxyyyyyyyyyykkkkkkkkkk\0",33) == 0);

        {
            sdsfree(x);
            char etalon[1024*1024];
            for (size_t i = 0; i < sizeof(etalon); i++) {
                etalon[i] = '0';
            }
            x = sdscatprintf(sdsempty(),"%0*d",(int)sizeof(etalon),0);
            test_cond("sdscatprintf() can print 1MB",
                sdslen(x) == sizeof(etalon) && memcmp(x,etalon,sizeof(etalon)) == 0);
        }
    }
    test_report();
    return 0;
}
#endif

调试环境:公司开发机
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

一些问题

  1. 在 sdsMakeRoomFor 中发现注释 “不要使用sdshdr5”,实际调试时仍然会使用 sdshdr5?
    为了节省内存,允许初始化时使用sdshdr5,在对sds进行扩容时会将sdshdr5扩展成sdshdr8
  2. _sdsnewlen会使用zmalloc.c下的zmalloc_usable函数,zmalloc.c在分配内存时做了些什么?
    在编译时执行编译参数可以分别使用glibc的ptmalloc、google的tcmalloc,facebook的jemalloc作为内存管理工具,zmalloc将这几种内存管理工具封装成了一个整体,为上层提供统一接口
  3. malloc时需要传入申请的内存大小,但是在free时只需要传入一个指针,free怎么知道它的大小?
    glibc的malloc相关源码没太看懂,参考CSAPP,在malloc时会申请更大的内存空间,用于存储当前内存的长度等信息,类似sds

reference

  1. redis6.2 README
  2. SDS README
  3. GDB入门指南

本人才疏学浅,理解水平有限,如果有错误及不当之处,请批评指正。