内核数据结构

Linux内核中实现了一些常见的数据结构，而且内核要求代码越高效越好，所以这些数据结构的实现极为优雅！因此，千万不要自己实现山寨的数据结构。

内核的数据结构包括：

链表（双向循环）
队列
IDR & IDA
红黑树

链表

链表头文件

位于include/linux/link.h

链表实现

一般我们写链表的时候，可能会这样定义：

struct list_item {
        void *data;               // 数据
        struct list_item *next;   // 下一个元素
        struct list_item *prev;   // 上一个元素
}

这种链表实现将数据结构塞进链表中（数据结构data被放到了链表节点list_item中）。

而linux的链表实现则是将链表节点塞进数据结构中。

链表节点的定义十分简单：

struct list_head {
        struct list_head *next;
        struct list_head *prev;
}

我们定义一个数据结构，将链表节点插入到数据结构中：

struct student {
        unsigned int age;
        bool is_boy;
        char name[16];
        struct list_head list;
}

那么问题来了，现在的链表中内存中大概是这样的：

student Tom:                       student Jacky:
[                                  [
age                                age
is_boy                             is_boy
name                               name
next             ----------->      next      
prev                               prev
]

通过student Tom的list.next指针访问的是student Jacky所在内存的next地址。
我们知道，c的数据类型是用来告诉编译器，这个数据有多长，仅此而已。
比如通过Jacky.name访问学生Jacky的名字，编译后，是直接通过Jacky内存地址偏移x个字节进行访问的，机器可不认你的变量名。

那么问题来了，获得Jacky中的list的地址是没用的，怎么获得Jacky的地址呢，难道要我们手动使用list地址 - x字节计算？
其实内核都为我们考虑好了，我们可以通过list_entry获得jacky的指针，list_entry定义如下：

#define list_entry(ptr, type, member) \
	container_of(ptr, type, member)
  
#define container_of(ptr, type, member) ({				\
	void *__mptr = (void *)(ptr);					\
	static_assert(__same_type(*(ptr), ((type *)0)->member) ||	\
		      __same_type(*(ptr), void),			\
		      "pointer type mismatch in container_of()");	\
	((type *)(__mptr - offsetof(type, member))); })

获得jacky的地址：

/* 
 * ptr是链表节点指针; struct student是包含链表节点的自定义数据类型; list是链表节点在自定义数据类型里链表节点的变量名
 * 编译时就确定了偏移
 */
struct student jacky = list_entry(ptr, struct student, list);

链表使用

插入节点

给定节点后插入：

1	list_add(struct list_head new, struct list_head head);

插入到尾部(将新节点插入到给定的节点前)：

1	list_add_tail(struct list_head new, struct list_head head);

删除节点

从链表中删除一个节点：

/*
 * 只是简单的修改：
 * next->prev = prev;
 * prev->next = next;
 * 还需要手动释放包含节点的结构体、以及节点的内存。
 */
list_del(struct list_head *entry);

从链表中删除一个节点，并重新初始化：

/*
 * 除了修改前后节点，还会将entry的prev、next指向自身。
 */
list_del_init(struct list_head *entry);

移动与合并

移动节点到另一个链表：

/*
 * 从一个链表中移除list，将list加到head节点后面。
 *
 */
list_move(struct list_head *list, struct list_head *head);

移动节点到另一个链表的尾部：

/*
 * 将list加到另一个链表head节点前面，即尾部。
 *
 */
list_move_tail(struct list_head *list, struct list_head *head);

合并链表：

/*
 * 将list指向的链表插入到head链表后面。
 */
list_splice(struct list_head *list, struct list_head *head);

合并链表并重新初始化原来的链表：

1	list_splice_init(struct list_head list, struct list_head head);

遍历链表

1	list_for_each(pos, head)

pos是临时变量，保存临时节点，head是提供到“链表头”。

直接拿到包含链表节点到数据结构指针：

1	list_for_each_entry(pos, head, member)

pos是包含链表结构的结构类型，list是结构中链表节点的变量名。

反向遍历：

1	list_for_each_entry_reverse(pos, head, member)

保存next节点（用于删除）：

1	list_for_each_entry_safe(pos, next, head, member)

demo

简单使用一下头部插入（栈的好实现）、遍历、删除
student_list.c:

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>

struct student {
        unsigned int age;
        bool is_boy;
        char name[16];
        struct list_head list;
};

static LIST_HEAD(student_list);

static int student_init(void)
{
        struct student *p, *n;

        p = kmalloc(sizeof(struct student), GFP_KERNEL);
        p->age = 18;
        p->is_boy = false;
        p->name[0] = 'T';
        p->name[1] = 'O';
        p->name[2] = 'M';
        p->name[3] = '\0';
        list_add(&p->list, &student_list);

        p = kmalloc(sizeof(struct student), GFP_KERNEL);
        p->age = 19;
        p->is_boy = true;
        p->name[0] = 'J';
        p->name[1] = 'A';
        p->name[2] = 'C';
        p->name[3] = 'K';
        p->name[4] = 'Y';
        p->name[5] = '\0';
        list_add(&p->list, &student_list);

        list_for_each_entry(p, &student_list, list){
                printk(KERN_ALERT "name: %s", p->name);
        }

        list_for_each_entry_safe(p, n, &student_list, list) {
                list_del(&p->list);
                kfree(p);
        }

        return 0;
}

static void student_exit(void)
{
        printk(KERN_ALERT "student_exit enter\n");
}

module_init(student_init);
module_exit(student_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("lyq1996");
MODULE_DESCRIPTION("A link list example");

Makefile:

ifneq ($(KERNELRELEASE),)
        obj-m := student_list.o
else
        KDIR ?= /lib/modules/`uname -r`/build

all:
        $(MAKE) -C $(KDIR) M=$(PWD) modules
clean:
        $(MAKE) -C $(KDIR) M=$(PWD) clean
endif

结果：

[  340.637294] student_exit enter
[  346.474869] name: JACKY
[  346.474911] name: TOM
[  349.238916] student_exit enter

list_bl.h是一种特殊的链表版本，把链表头的最低位作为锁，实现hashtable的时候很有用！
list_lru.h 有点复杂了，与内存收缩有关，开发者发现收缩时总是把最久最少使用的内存给释放掉，所以抽出了公共的list_lru。不太懂，不敢乱说，也还不会使用。
list_nulls.h也是一种特殊的链表版本，链表尾不是空指针，但却可以用来表示”空s”，可以有2^31不同的值。
list_sort.h用于链表排序，传入对比用的函数指针。

队列

队列头文件

位于include/linux/kfifo.h，内核队列使用环形缓冲区实现，并且借助了内存屏障，实现了一个reader和一个writer的情况下的无锁并发访问。
这个头里面全部都是宏定义，看的头昏。
大概就是in指向队尾，out指向队头，in%size决定元素存放在第几个位置。在多个writer、一个reader时，只需要对writer加锁；在多个reader、一个writer时，只需要对reader加锁。

队列使用

初始化队列

静态声明+初始化（很少使用）：

// 声明队列 size时元素个数 为2的幂
DECLARE_KFIFO(fifo, type, size)
// 初始化队列
INIT_KFIFO(fifo)

或者

1 2	// 声明+初始化 DEFINE_KFIFO(fifo, type, size)

动态初始化：

1
2
3

DECLARE_KFIFO_PTR(fifo, type);
// gfp_mask为内存分配标识
kfifo_alloc(fifo, size, gfp_mask)

也可以自己提供缓冲区：

1 2	// buffer是预分配的内存 kfifo_init(fifo, buffer, size)

基本方法

检查kfifo是否初始化：

1	kfifo_initialized(fifo)

返回kfifo单个元素大小：

1	kfifo_esize(fifo)

返回kfifo记录长度字段的大小：

1	kfifo_recsize(fifo)

返回kfifo元素大小：

1	kfifo_size(fifo)

重置kfifo：

1 2	// 将in和out置为0 kfifo_reset(fifo)

重置out（忽略当前入队的元素）：

1	kfifo_reset_out(fifo)

返回kfifo元素个数：

1	kfifo_len(fifo)

判断kfifo是否为空：

1	kfifo_is_empty(fifo)

判断kfifo是否为空（自旋锁）：

1	kfifo_is_empty_spinlocked(fifo, lock)

判断kfifo是否为空（自旋锁、不禁用中断）：

1	kfifo_is_empty_spinlocked_noirqsave(fifo, lock)

判断kfifo是否满了

1	kfifo_is_full(fifo)

判断kfifo还有多少可用

1	kfifo_avail(fifo)

跳过kfifo当前队头元素

1	kfifo_skip(fifo)

返回kfifo下一条记录的大小

1	kfifo_peek_len(fifo)

插入一个元素到kfifo中：

1	kfifo_put(fifo, val)

弹出一个元素：

1	kfifo_get(fifo, val)

查看队头元素而不弹出：

1	kfifo_peek(fifo, val)

插入多个元素到kfifo中：

1	kfifo_in(fifo, buf, n)

插入多个元素到kfifo中（自旋锁）：

1	kfifo_in_spinlocked(fifo, buf, n, lock)

插入多个元素到kfifo中（自旋锁、不禁用中断）：

1	kfifo_in_spinlocked_noirqsave(fifo, buf, n, lock)

弹出n个元素：

1	kfifo_out(fifo, buf, n)

弹出n个元素（自旋锁）：

1	kfifo_out_spinlocked(fifo, buf, n, lock)

弹出n个元素（自旋锁、不禁用中断）：

1	kfifo_out_spinlocked(fifo, buf, n, lock)

从用户空间入队数据：

1	kfifo_from_user(fifo, from, len, copied)

从队列弹出到用户空间：

1	kfifo_to_user(fifo, to, len, copied)

查看队头数据：

1	kfifo_out_peek(fifo, buf, n)

demo

见samples/kfifo，啥都有

IDR & IDA

关于IDR & IDA

IDR和IDA基于XArray数据结构，4.20以前基于Radix Tree。

IDR可以用于ID到指针的映射；而IDA是特殊的IDR，只能存放ID，没有对应值的映射。

IDR & IDA的使用

只写一点较为混淆的接口。
静态初始化（声明+静态分配）：

1 2	DEFINE_IDR(name) DEFINE_IDA(name)

为下个ida_alloc调用预分配内存：

1
2
3

idr_preload

idr_preload_end

这个函数会设置内存分配flag，然后把抢占关闭掉，之所以这么做是因为有时要拿锁并进行ida_alloc时，内存分配可能会失败，因此提前分配。

分配id：

idr_alloc

循环分配id：

1	idr_alloc_cyclic

id可能大于INT_MAX时：

1	idr_alloc_u32

demo

#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/idr.h>

#define K_FREE(ptr) \
        do { \
                if (ptr) kfree(ptr); \
        } while (0);

#if 0
#define DYNAMIC_ALLOC
#endif

#ifdef DYNAMIC_ALLOC
static struct idr student_idr;
static struct ida student_ida;
#else
DEFINE_IDR(student_idr);
DEFINE_IDA(student_ida);
#endif

struct student {
        unsigned char age;
        const char *name;
};

static int student_init(void)
{
        struct student *sp;

        unsigned int pos;
        struct student jacky = {
                .age = 12,
                .name = "jacky"
        };
        struct student tom = {
                .age = 13,
                .name = "tom"
        };
        struct student andy = {
                .age = 14,
                .name = "andy"
        };
        struct student kyle = {
                .age = 15,
                .name = "kyle"
        };


#ifdef DYNAMIC_ALLOC
        idr_init(&student_idr);
        ida_init(&student_ida);
#endif

        // get the current position of the cyclic allocator
        pos = idr_get_cursor(&student_idr);
        if (pos == 0) {
                printk(KERN_ALERT "cursor should be 0, right");
        }

        pos = idr_alloc_cyclic(&student_idr, &jacky, 0, 100, GFP_KERNEL);
        if (pos == 0) {
                printk(KERN_ALERT "cursor should be 0, right");
        }

        idr_set_cursor(&student_idr, 2);
        pos = idr_alloc_cyclic(&student_idr, &tom, 0, 100, GFP_KERNEL);
        if (pos == 2) {
                printk(KERN_ALERT "cursor should be 2, right");
        }

        idr_set_cursor(&student_idr, 0);
        pos = idr_alloc_cyclic(&student_idr, &tom, 0, 1, GFP_KERNEL);
        if (pos == -ENOSPC) {
                printk(KERN_ALERT "no id availiable, right");
        }

        pos = idr_alloc(&student_idr, &andy, 1, 2, GFP_KERNEL);
        if (pos == 1) {
                printk(KERN_ALERT "cursor should be 1, right");
        }

        pos = INT_MAX - 1;
        pos = idr_alloc_u32(&student_idr, &kyle, &pos, INT_MAX - 1, GFP_KERNEL);
        printk(KERN_ALERT "idr_alloc_u32 ret: %d", pos);

        sp = idr_find(&student_idr, INT_MAX - 1);
        printk(KERN_ALERT "idr_find: %px", sp);

        printk(KERN_ALERT "idr is %s", idr_is_empty(&student_idr)? "empty" : "not empty");

        idr_for_each_entry(&student_idr, sp, pos) {
                if (sp) {
                        printk(KERN_ALERT "%d : %s", pos, sp->name);
                }
        }

        pos = ida_alloc(&student_ida, GFP_KERNEL);
        printk(KERN_ALERT "ida_alloc ret: %d", pos);

        pos = ida_alloc_range(&student_ida, 0, 1, GFP_KERNEL);
        printk(KERN_ALERT "ida_alloc_range ret: %d", pos);

        pos = ida_alloc_range(&student_ida, 0, 0, GFP_KERNEL);
        printk(KERN_ALERT "ida_alloc_range ret: %d", pos);

        ida_destroy(&student_ida);
        idr_destroy(&student_idr);

        return 0;
}

static void student_exit(void)
{
        printk(KERN_ALERT "student_exit enter\n");
}

module_init(student_init);
module_exit(student_exit);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("A idr & ida example");

/ # insmod student_idr.ko 
[    7.554757] student_idr: loading out-of-tree module taints kernel.
[    7.558928] cursor should be 0, right
[    7.558960] cursor should be 0, right
[    7.559048] cursor should be 2, right
[    7.559141] no id availiable, right
[    7.559231] cursor should be 1, right
[    7.559361] idr_alloc_u32 ret: 0
[    7.559443] idr_find: ffffc900001d7dd0
[    7.559565] idr is not empty
[    7.559682] 0 : jacky
[    7.559751] 1 : andy
[    7.559806] 2 : tom
[    7.559896] 2147483646 : kyle
[    7.559964] ida_alloc ret: 0
[    7.560032] ida_alloc_range ret: 1

红黑树

红黑树头文件

位于include/linux/rbtree.h

红黑树的使用

一般为了最好的性能，需要自己实现节点插入和查找的函数。插入之后，因为已经有序了，所以遍历则不需要提供自己的函数，由内核提供。当然提供节点对比回调函数来插入和查找也是可以的，但是内核不推荐你这么做，因为函数调用会损耗性能。回调函数的使用见我的demo的rb_find_add。

内核红黑树使用起来和链表一样，将节点塞到结构里，而不是把结构塞到节点里。

demo

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/rbtree.h>
#include <linux/slab.h>

struct student_info {
    int id;
    const char *name;
    struct rb_node node;
};

// root of tree
static struct rb_root student_root = RB_ROOT;

struct student_info *student_search(struct rb_root *root, int search_id)
{
    struct rb_node *node = root->rb_node;

    while(node) {
        struct student_info *student = rb_entry(node, struct student_info, node);
        int result;

        result = search_id - student->id;
        if (result < 0) {
            // left sub-tree
            node = node->rb_left;
        }
        else if (result > 0) {
            // right sub-tree
            node = node->rb_right;
        }
        else {
            return student;
        }
    }
    return NULL;
}

int student_insert(struct rb_root *root, struct student_info *student)
{
    struct rb_node **new = &(root->rb_node);
    struct rb_node *parent = NULL;

    while (*new) {
        struct student_info *current_student = rb_entry(*new, struct student_info, node);
        int result = student->id - current_student->id;

        parent = *new;
        if (result < 0) {
            // left sub-tree
            new = &((*new)->rb_left);
        }
        else if (result > 0) {
            // right sub-tree
            new = &((*new)->rb_right);
        }
        else {
            // key already existed
            return -1;
        }
    }
    
    // add node into tree and rebalance
    rb_link_node(&student->node, parent, new);
    rb_insert_color(&student->node, root);
    return 0;
}

int student_cmp(struct rb_node *node, const struct rb_node *parent) {
    struct student_info *current_student = rb_entry(node, struct student_info, node);
    struct student_info *parent_student = rb_entry(parent, struct student_info, node);

    return current_student->id - parent_student->id;
}

static int student_init(void)
{
    struct student_info *student;
    struct rb_node *node;
    struct student_info tom = {.id = 0, .name = "tom", .node = {0}};
    struct student_info andy = {.id = 1, .name = "andy", .node = {0}};
    struct student_info buddy = {.id = 2, .name = "buddy", .node = {0}};
    struct student_info kyle = {.id = 3, .name = "kyle", .node = {0}};
    struct student_info arthur = {.id = 4, .name = "arthur", .node = {0}};
    struct student_info bob = {.id = 5, .name = "bob", .node = {0}};
    struct student_info rudy = {.id = 6, .name = "rudy", .node = {0}};
    struct student_info jacky = {.id = 7, .name = "jacky", .node = {0}};
    struct student_info lisa = {.id = 8, .name = "lisa", .node = {0}};
    struct student_info kevin = {.id = 9, .name = "kevin", .node = {0}};
    student_insert(&student_root, &tom);
    student_insert(&student_root, &andy);
    student_insert(&student_root, &buddy);
    student_insert(&student_root, &kyle);
    student_insert(&student_root, &arthur);
    student_insert(&student_root, &bob);
    student_insert(&student_root, &rudy);
    student_insert(&student_root, &jacky);
    student_insert(&student_root, &lisa);

    for (u32 i = 0; i < 9; ++i) {
        student = student_search(&student_root, i);
        if (student) {
            printk(KERN_ALERT "search id: %d, name: %s\n", student->id, student->name);
        }
    }

    for (node = rb_first(&student_root); node; node = rb_next(node)) {
        student = rb_entry(node, struct student_info, node);
        printk(KERN_ALERT "traversal id: %d, name: %s\n", student->id, student->name);
    }

    if (!rb_find_add(&kevin.node, &student_root, student_cmp)) {
        printk(KERN_ALERT "kevin added\n");
    }
    
    node = rb_last(&student_root);
    student = rb_entry(node, struct student_info, node);
    printk(KERN_ALERT "last id: %d, name: %s\n", student->id, student->name);

    student_root = RB_ROOT;
    return 0;
}

static void student_exit(void)
{
    printk(KERN_ALERT "student_exit enter\n");
}

module_init(student_init);
module_exit(student_exit);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("A kernel rbtree example");

/ # insmod student_rbtree.ko 
[    8.587506] student_rbtree: loading out-of-tree module taints kernel.
[    8.591524] search id: 0, name: tom
[    8.591626] search id: 1, name: andy
[    8.591699] search id: 2, name: buddy
[    8.591769] search id: 3, name: kyle
[    8.591854] search id: 4, name: arthur
[    8.591924] search id: 5, name: bob
[    8.591988] search id: 6, name: rudy
[    8.592071] search id: 7, name: jacky
[    8.592118] search id: 8, name: lisa
[    8.592178] traversal id: 0, name: tom
[    8.592237] traversal id: 1, name: andy
[    8.592329] traversal id: 2, name: buddy
[    8.592396] traversal id: 3, name: kyle
[    8.592466] traversal id: 4, name: arthur
[    8.592528] traversal id: 5, name: bob
[    8.592593] traversal id: 6, name: rudy
[    8.592656] traversal id: 7, name: jacky
[    8.592726] traversal id: 8, name: lisa
[    8.592964] kevin add success
[    8.593098] last id: 9, name: kevin

结束语

基本的数据结构就整理到这里，内核的实现真的是很漂亮，后面在工作中如果遇到需要使用这些数据结构的场景，我选择直接移植这些数据结构到用户应用！
另外截止目前，Linux版本已经到5.19了，而《Linux内核设计与实现》讲的是2.6，这中间可能新增了很多数据结构，改天再开个博客再记录。

内核数据结构

链表

链表头文件

链表实现

链表使用

插入节点

删除节点

移动与合并

遍历链表

demo

更多

队列

队列头文件

队列使用

初始化队列

基本方法

demo

IDR & IDA

关于IDR & IDA

IDR & IDA的使用

demo

红黑树

红黑树头文件

红黑树的使用

demo

结束语