linux cgroup代码学习(2)——数据结构整理

最新推荐文章于 2024-12-18 10:23:14 发布

转载最新推荐文章于 2024-12-18 10:23:14 发布 · 245 阅读

0 ·

CC 4.0 BY-SA版权

原文链接：https://segmentfault.com/a/1190000010543907

文章标签：

#数据结构与算法

本文深入解析了Linux CGroup机制中的核心结构，包括task_struct、css_set、cgroup、cg_cgroup_link等，并详细阐述了它们之间的关联方式及作用。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

相关的结构

task_struct

定义了CONFIG_CGROUPS宏的话，task_struct和cgroup相关的变量有：

    /* Control Group info protected by css_set_lock */
    struct css_set __rcu *cgroups;//关联的css_set
    /* cg_list protected by css_set_lock and tsk->alloc_lock */
    struct list_head cg_list;//加入css_set中tasks链表，所有css_set相同的链表。

CSS_SET

struct css_set {

    /* Reference count */
    atomic_t refcount;//引用计数

    /*
     * List running through all cgroup groups in the same hash
     * slot. Protected by css_set_lock
     */
    struct hlist_node hlist;//具有hash的css_set 链表.

    /*
     * List running through all tasks using this cgroup
     * group. Protected by css_set_lock
     */
    struct list_head tasks;//所有具有相同css_set的task_struct链表。

    /*
     * List of cg_cgroup_link objects on link chains from
     * cgroups referenced from this css_set. Protected by
     * css_set_lock
     */
    struct list_head cg_links;//由cg_cgroup_link组成的链表，链表上每一项cg_cgroup_link都指向和css_set关联的cgroup.

    /*
     * Set of subsystem states, one for each subsystem. This array
     * is immutable after creation apart from the init_css_set
     * during subsystem registration (at boot time) and modular subsystem
     * loading/unloading.
     */
    /*
     *css_set关联的css.每一个subsystem对应数组中相应id的项。
     *subsys应当包括所有子系统的css.如果此css_set没有制定某个subsystem的css或者subsystem没有mount，则默认初始化为根css.
     */
    struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];

    /* For RCU-protected deletion */
    struct rcu_head rcu_head;
    }

css_set是直接和task关联的结构，css_set用来关联一组cgroup_subsys_state对象，同时通过cg_cgroup_link和所有相关的cgroup建立关联。

cgroup

struct cgroup {
    unsigned long flags;        /* "unsigned long" so bitops work */

    /*
     * count users of this cgroup. >0 means busy, but doesn't
     * necessarily indicate the number of tasks in the cgroup
     */
    atomic_t count;//cgroup引用计数

    int id;                /* ida allocated in-hierarchy ID */

    /*
     * We link our 'sibling' struct into our parent's 'children'.
     * Our children link their 'sibling' into our 'children'.
     */
    struct list_head sibling;    /* my parent's children */ //兄弟cgroup
    struct list_head children;    /* my children */ //child cgroups
    struct list_head files;        /* my files */

    struct cgroup *parent;        /* my parent */   //parent cgroup
    struct dentry *dentry;        /* cgroup fs entry, RCU protected */ //此cgroup对应的dentry

    /*
     * This is a copy of dentry->d_name, and it's needed because
     * we can't use dentry->d_name in cgroup_path().
     *
     * You must acquire rcu_read_lock() to access cgrp->name, and
     * the only place that can change it is rename(), which is
     * protected by parent dir's i_mutex.
     *
     * Normally you should use cgroup_name() wrapper rather than
     * access it directly.
     */
    struct cgroup_name __rcu *name; //cgroup的name,同dentry->d_name

    /* Private pointers for each registered subsystem */
    struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];//此cgroup关联subsystem的css结构，每个subsystem的css在数组中对应subsys[subsystem->subsys_id].

    struct cgroupfs_root *root;//cgroup所处的cgroupfs_root,对应hirerarchy    
    /*
     * List of cg_cgroup_links pointing at css_sets with
     * tasks in this cgroup. Protected by css_set_lock
     */
    struct list_head css_sets; //通过cs_cgroup_link指向此cgroup关联的css_set

    struct list_head allcg_node;    /* cgroupfs_root->allcg_list */
    struct list_head cft_q_node;    /* used during cftype add/rm */

    /*
     * Linked list running through all cgroups that can
     * potentially be reaped by the release agent. Protected by
     * release_list_lock
     */
    struct list_head release_list;

    /*
     * list of pidlists, up to two for each namespace (one for procs, one
     * for tasks); created on demand.
     */
    struct list_head pidlists;
    struct mutex pidlist_mutex;

    /* For RCU-protected deletion */
    struct rcu_head rcu_head;
    struct work_struct free_work;

    /* List of events which userspace want to receive */
    struct list_head event_list;
    spinlock_t event_list_lock;

    /* directory xattrs */
    struct simple_xattrs xattrs;
};

cgroup对应一个controll实例，需要和css_set关联，这样每个task_struct就可以通过task_struct->cgroups 继而和cgroup关联，达到控制和隔离资源的目的，那么css_set和cgroup具体是怎么关联的呢？
首先需要明确的是cgroup和css_set是多对多的关系，既：一个css_set可以对应多个cgroup,同时一个cgroup也可以被多个css_set所包含。
这种多对多的映射关系，是通过cg_cgroup_link这个中间结构来关联的。

/* Link structure for associating css_set objects with cgroups */
struct cg_cgroup_link {
    /*
     * List running through cg_cgroup_links associated with a
     * cgroup, anchored on cgroup->css_sets
     */
    struct list_head cgrp_link_list;
    struct cgroup *cgrp;
    /*
     * List running through cg_cgroup_links pointing at a
     * single css_set object, anchored on css_set->cg_links
     */
    struct list_head cg_link_list;
    struct css_set *cg;
};

一个cg_cgroup_link需要包含两类信息，即关联的cgroup和css_set信息，一个cg_cgroup_link可以让一个cgroup和一个css_set相关联。但是正如我们前面所说，css_set和cgroup是多对多的对应关系，所以，一个css_set需要保存多个cg_cgroup_link，一个cgroup也需要保存多个cg_cgroup_link信息。具体来说，css_set中的cg_links维护了一个链表，链表中的元素为cg_cgroup_link中的cg_link_list.cgroup中的css_set也维护了一个cg_cgroup_link链表，链表中元素为cgrp_link_list.结构如下图所示：

图片描述

cgroupfs_root

cgroup_root对应一个层级，从Linux 文件系统角度来讲，cgroupfs_root对应我们mount相应cgroup时创建的super_block.即，我们每进行一次mount操作，就对应一个cgroupfs_root的创建。

/*
 * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
 * associated with a superblock to form an active hierarchy.  This is
 * internal to cgroup core.  Don't access directly from controllers.
 */
struct cgroupfs_root {
    //cgroupfs_root对应的super block
    struct super_block *sb;

    /*
     * The bitmask of subsystems intended to be attached to this
     * hierarchy
     */
    unsigned long subsys_mask;//此hierarchy层级中包含的子系统，以掩码表示

    /* Unique id for this hierarchy. */
    int hierarchy_id;//id

    /* The bitmask of subsystems currently attached to this hierarchy */
    unsigned long actual_subsys_mask;//TOBE DONE

    /* A list running through the attached subsystems */
    struct list_head subsys_list;//关联的cgroup_subsys list

    /* The root cgroup for this hierarchy */
    struct cgroup top_cgroup;// 此hierarchy的root cgroup

    /* Tracks how many cgroups are currently defined in hierarchy.*/
    int number_of_cgroups;//此hierarchy cgroup 数目

    /* A list running through the active hierarchies */
    struct list_head root_list; //系统中hierarchy链表 

    /* All cgroups on this root, cgroup_mutex protected */
    struct list_head allcg_list;//此hierarchy包含的所有的cgroup

    /* Hierarchy-specific flags */
    unsigned long flags;

    /* IDs for cgroups in this hierarchy */
    struct ida cgroup_ida;

    /* The path to use for release notifications. */
    char release_agent_path[PATH_MAX];

    /* The name for this hierarchy - may be empty */
    char name[MAX_CGROUP_ROOT_NAMELEN];
};

cgroup_subsys

对应特定的子系统。

/*
 * Control Group subsystem type.
 * See Documentation/cgroups/cgroups.txt for details
 */

struct cgroup_subsys {
    struct cgroup_subsys_state *(*css_alloc)(struct cgroup *cgrp);
    int (*css_online)(struct cgroup *cgrp);
    void (*css_offline)(struct cgroup *cgrp);
    void (*css_free)(struct cgroup *cgrp);

    int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
    void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
    void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
    void (*fork)(struct task_struct *task);
    void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
             struct task_struct *task);
    void (*bind)(struct cgroup *root);
    /*上面的针对不同的子系统指向不同的函数指针*/
    
    int subsys_id;// subsystem id
    int disabled;
    int early_init;//是否early_init
    /*
     * True if this subsys uses ID. ID is not available before cgroup_init()
     * (not available in early_init time.)
     */
    bool use_id;

    /*
     * If %false, this subsystem is properly hierarchical -
     * configuration, resource accounting and restriction on a parent
     * cgroup cover those of its children.  If %true, hierarchy support
     * is broken in some ways - some subsystems ignore hierarchy
     * completely while others are only implemented half-way.
     *
     * It's now disallowed to create nested cgroups if the subsystem is
     * broken and cgroup core will emit a warning message on such
     * cases.  Eventually, all subsystems will be made properly
     * hierarchical and this will go away.
     */
    bool broken_hierarchy;
    bool warned_broken_hierarchy;

#define MAX_CGROUP_TYPE_NAMELEN 32
    const char *name;

    /*
     * Link to parent, and list entry in parent's children.
     * Protected by cgroup_lock()
     */
    struct cgroupfs_root *root;//指向所属的hierarchy
    struct list_head sibling;
    /* used when use_id == true */
    struct idr idr;
    spinlock_t id_lock;

    /* list of cftype_sets */
    struct list_head cftsets;

    /* base cftypes, automatically [de]registered with subsys itself */
    struct cftype *base_cftypes;
    struct cftype_set base_cftset;

    /* should be defined only by modular subsystems */
    struct module *module;
    }

cgroup_subsys_state

/* Per-subsystem/per-cgroup state maintained by the system. */
struct cgroup_subsys_state {
/*
 * The cgroup that this subsystem is attached to. Useful
 * for subsystems that want to know about the cgroup
 * hierarchy structure
 */
struct cgroup *cgroup;//此css关联的cgroup. 1个cgroup可对应多个css

/*
 * State maintained by the cgroup system to allow subsystems
 * to be "busy". Should be accessed via css_get(),
 * css_tryget() and css_put().
 */

atomic_t refcnt;

unsigned long flags;
/* ID for this css, if possible */
struct css_id __rcu *id;

/* Used to put @cgroup->dentry on the last css_put() */
struct work_struct dput_work;
};

此外，还有几个cgroup相关的全局变量



 /*
 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
 * subsystems that are otherwise unattached - it never has more than a
 * single cgroup, and all tasks are part of that cgroup.
 * 初始化默认cgroupfs_root. 系统初始化时所有的subsystem都关联到此hierarchy. 不可以在默认
 * hierarchy上创建cgroup，因此其只有一个默认的root cgroup.
 */
static struct cgroupfs_root rootnode;

/* The default css_set - used by init and its children prior to any
 * hierarchies being mounted. It contains a pointer to the root state
 * for each subsystem. Also used to anchor the list of css_sets. Not
 * reference-counted, to improve performance when child cgroups
 * haven't been created.
 * 初始化默认的css_set. 在没有hierarchy被mount之前，系统初始化时init及其子进程关联此css_set。
 * init_css_set->subsys指向每个subsys的root css.
 */
static struct css_set init_css_set;

/*
 * hash table for cgroup groups. This improves the performance to find
 * an existing css_set. This hash doesn't (currently) take into
 * account cgroups in empty hierarchies.
 * css_set的hash table.将task关联到指定css，就是将task->cgroup指针指向一个
 * css_set. css_set_table以css[]为key,相同的css集合，即为同一个css_set. 当有task需要关联到到一
 * 组css时，以css[]为key在hash table中查找是否存在,存在直接引用此key的css_set，否则创建css_set
 * 并添加到hash table.
 * 
#define CSS_SET_HASH_BITS    7
static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);