slab分配器详解-优快云博客

slab分配器扮演了通用数据结构缓存层的角色。

slab分配器的概念首先在Sun Microsystem的SunOS 5.4操作系统中得以实现。Linux数据结构缓存层分享了它的名字和基本设计思想。

slab分配器试图在几个基本原则之间寻求一种平衡：

频繁使用的数据结构也会频繁的分配和是否，因此应当适当缓存它们。

频繁分配和回收必然会导致内存碎片。为了避免这种现象，空闲链表的缓存会连续地存放。因为已释放的数据结构又会放回空闲链表，因此不会导致碎片。

回收的对象可以立即投入下一次分配，因此对于频繁的分配和释放，空闲链表能够提高其性能。

如果分配器知道对象大小、页大小和总的高速缓存的大小这样的概念，它会做出更明智的决策。

如果让部分缓存专属于单个处理器，那么，分配和释放和就可以在不加SMP锁的情况下进行。

如果分配器是NUMA相关的，它就可以从相同的内存节点为请求者进行分配。

对存放的对象进行着色，以防止多个对象映射到相同的高速缓存行。

slab层的设计

slab层把不同的对象划分为所谓高速缓存组，其中每个高速缓存都存放不同类型的对象。每种对象类型对应一个高速缓存。kmalloc()接口建立在slab层之上，使用了一组通用高速缓存。即，kmalloc()分配的内存是从通用高速缓存中分配的。

然后，这些高速缓存又被划分为slab。slab由一个或多个物理上连续的页组成。一般情况下，slab就仅仅由一页组成。每个高速缓存可以由多个slab组成。

每个slab都包含一些对象成员，这里的对象指的是被缓存的数据结构。每个slab处于三种状态之一：满、部分满或空。一个满的slab没有空闲对象，一个空的slab没有分配出任何对象，一部分满的slab有一些对象已经分配出去还有些对象空闲中。当内核的某一部分需要一个新的对象时，先从部分满的slab中进行分配。如果没有部分满的slab就从空的slab进行分配。如果没有空的slab，就要创建一个slab了。

高速缓存、slab和对象之间的关系如下所示：

 
 在<Slab.c(mm)>中
/*
 * struct kmem_cache
 *
 * manages a cache.
 */
struct kmem_cache {
/* 1) per-cpu data, touched during every alloc/free */
    struct array_cache *array[NR_CPUS];
/* 2) Cache tunables. Protected by cache_chain_mutex */
    unsigned int batchcount;
    unsigned int limit;
    unsigned int shared;
    unsigned int buffer_size;
    u32 reciprocal_buffer_size;
/* 3) touched by every alloc & free from the backend */
    struct kmem_list3 *nodelists[MAX_NUMNODES];
    unsigned int flags;     /* constant flags */
    unsigned int num;       /* # of objs per slab */
/* 4) cache_grow/shrink */
    /* order of pgs per slab (2^n) */
    unsigned int gfporder;
    /* force GFP flags, e.g. GFP_DMA */
    gfp_t gfpflags;
    size_t colour;          /* cache colouring range */
    unsigned int colour_off;    /* colour offset */
    struct kmem_cache *slabp_cache;
    unsigned int slab_size;
    unsigned int dflags;        /* dynamic flags */
    /* constructor func */
    void (*ctor) (void *, struct kmem_cache *, unsigned long);
    /* de-constructor func */
    void (*dtor) (void *, struct kmem_cache *, unsigned long);
/* 5) cache creation/removal */
    const char *name;
    struct list_head next;
/* 6) statistics */
#if STATS 
    unsigned long num_active;
    unsigned long num_allocations;
    unsigned long high_mark;
    unsigned long grown;
    unsigned long reaped;
    unsigned long errors;
    unsigned long max_freeable;
    unsigned long node_allocs;
    unsigned long node_frees;
    unsigned long node_overflow;
    atomic_t allochit;
    atomic_t allocmiss;
    atomic_t freehit;
    atomic_t freemiss;
#endif 
#if DEBUG 
    /*
     * If debugging is enabled, then the allocator can add additional
     * fields and/or padding to every object. buffer_size contains the total
     * object size including these internal fields, the following two
     * variables contain the offset to the user object and its size.
     */
    int obj_offset;
    int obj_size;
#endif 
};

每个高速缓存都是用kmem_cache_t结构来表示。这个结构包含三个链表slabs_full，slabs_partial和slabs_empty，均存放在kmem_list3结构内。这些链表包含高速缓存中的所有slab。

 
 
/*
 * The slab lists for all objects.
 */
struct kmem_list3 {
    struct list_head slabs_partial; /* partial list first, better asm code */
    struct list_head slabs_full;
    struct list_head slabs_free;
    unsigned long free_objects;
    unsigned int free_limit;
    unsigned int colour_next;   /* Per-node cache coloring */
    spinlock_t list_lock;
    struct array_cache *shared; /* shared per node */
    struct array_cache **alien; /* on other nodes */
    unsigned long next_reap;    /* updated without locking */
    int free_touched;       /* updated without locking */
};

以下是slab的定义：

 
  
/*
 * struct slab
 *
 * Manages the objs in a slab. Placed either at the beginning of mem allocated
 * for a slab, or allocated from an general cache.
 * Slabs are chained into three list: fully used, partial, fully free slabs.
 */
struct slab {
    struct list_head list;
    unsigned long colouroff;
    void *s_mem;        /* including colour offset */
    unsigned int inuse; /* num of objs active in slab */
    kmem_bufctl_t free;
    unsigned short nodeid;
};

 

slab描述符要么在slab之外另行分配，要么放在slab自身最开始的地方。如果slab描述符很小或者slab内部有足够的空间容纳slab描述符，那么描述符就存放在slab里面。

slab分配器可以创建新的slab，这是通过alloc_pages_node()低级内核页分配器进行的：

 
  
/*
 * Interface to system's page allocator. No need to hold the cache-lock.
 *
 * If we requested dmaable memory, we will get it. Even if we
 * did not request dmaable memory, we might get it, but that
 * would be relatively rare and ignorable.
 */
static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
    struct page *page;
    int nr_pages;
    int i;
#ifndef CONFIG_MMU 
    /*
     * Nommu uses slab's for process anonymous memory allocations, and thus
     * requires __GFP_COMP to properly refcount higher order allocations
     */
    flags |= __GFP_COMP;
#endif 
    flags |= cachep->gfpflags;
    page = alloc_pages_node(nodeid, flags, cachep->gfporder);
    if (!page)
        return NULL;
    nr_pages = (1 << cachep->gfporder);
    if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
        add_zone_page_state(page_zone(page),
            NR_SLAB_RECLAIMABLE, nr_pages);
    else
        add_zone_page_state(page_zone(page),
            NR_SLAB_UNRECLAIMABLE, nr_pages);
    for (i = 0; i < nr_pages; i++)
        __SetPageSlab(page + i);
    return page_address(page);
}