内存管理[5]-优快云博客

本文介绍 Linux 内核中的 Slab 缓存管理机制，包括如何创建、分配、释放缓存对象以及销毁缓存。详细解析了 kmem_cache_create、kmem_cache_alloc、kmem_cache_free 和 kmem_cache_destroy 函数的实现细节。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

一个新的高速缓存是通过kmem_chache_create()函数进行创建的：

在<Slab.c(mm)>中
/**
 * kmem_cache_create - Create a cache.
 * @name: A string which is used in /proc/slabinfo to identify this cache.
 * @size: The size of objects to be created in this cache.
 * @align: The required alignment for the objects.
 * @flags: SLAB flags
 * @ctor: A constructor for the objects.
 * @dtor: A destructor for the objects.
 *
 * Returns a ptr to the cache on success, NULL on failure.
 * Cannot be called within a int, but can be interrupted.
 * The @ctor is run when new pages are allocated by the cache
 * and the @dtor is run before the pages are handed back.
 *
 * @name must be valid until the cache is destroyed. This implies that
 * the module calling this has to destroy the cache before getting unloaded.
 *
 * The flags are
 *
 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 * to catch references to uninitialised memory.
 *
 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 * for buffer overruns.
 *
 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 * cacheline.  This can be beneficial if you're counting cycles as closely
 * as davem.
 */
struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
    unsigned long flags,
    void (*ctor)(void*, struct kmem_cache *, unsigned long),
    void (*dtor)(void*, struct kmem_cache *, unsigned long))
{
    size_t left_over, slab_size, ralign;
    struct kmem_cache *cachep = NULL, *pc;
    /*
     * Sanity checks... these are all serious usage bugs.
     */
    if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
        (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
        printk(KERN_ERR "%s: Early error in slab %s/n", __FUNCTION__,
                name);
        BUG();
    }
    /*
     * We use cache_chain_mutex to ensure a consistent view of
     * cpu_online_map as well.  Please see cpuup_callback
     */
    mutex_lock(&cache_chain_mutex);
    list_for_each_entry(pc, &cache_chain, next) {
        char tmp;
        int res;
        /*
         * This happens when the module gets unloaded and doesn't
         * destroy its slab cache and no-one else reuses the vmalloc
         * area of the module.  Print a warning.
         */
        res = probe_kernel_address(pc->name, tmp);
        if (res) {
            printk("SLAB: cache with size %d has lost its name/n",
                   pc->buffer_size);
            continue;
        }
        if (!strcmp(pc->name, name)) {
            printk("kmem_cache_create: duplicate cache %s/n", name);
            dump_stack();
            goto oops;
        }
    }
#if DEBUG 
    WARN_ON(strchr(name, ' ')); /* It confuses parsers */
    if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
        /* No constructor, but inital state check requested */
        printk(KERN_ERR "%s: No con, but init state check "
               "requested - %s/n", __FUNCTION__, name);
        flags &= ~SLAB_DEBUG_INITIAL;
    }
#if FORCED_DEBUG 
    /*
     * Enable redzoning and last user accounting, except for caches with
     * large objects, if the increased size would increase the object size
     * above the next power of two: caches with object sizes just above a
     * power of two have a significant amount of internal fragmentation.
     */
    if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
        flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
    if (!(flags & SLAB_DESTROY_BY_RCU))
        flags |= SLAB_POISON;
#endif 
    if (flags & SLAB_DESTROY_BY_RCU)
        BUG_ON(flags & SLAB_POISON);
#endif 
    if (flags & SLAB_DESTROY_BY_RCU)
        BUG_ON(dtor);
    /*
     * Always checks flags, a caller might be expecting debug support which
     * isn't available.
     */
    BUG_ON(flags & ~CREATE_MASK);
    /*
     * Check that size is in terms of words.  This is needed to avoid
     * unaligned accesses for some archs when redzoning is used, and makes
     * sure any on-slab bufctl's are also correctly aligned.
     */
    if (size & (BYTES_PER_WORD - 1)) {
        size += (BYTES_PER_WORD - 1);
        size &= ~(BYTES_PER_WORD - 1);
    }
    /* calculate the final buffer alignment: */
    /* 1) arch recommendation: can be overridden for debug */
    if (flags & SLAB_HWCACHE_ALIGN) {
        /*
         * Default alignment: as specified by the arch code.  Except if
         * an object is really small, then squeeze multiple objects into
         * one cacheline.
         */
        ralign = cache_line_size();
        while (size <= ralign / 2)
            ralign /= 2;
    } else {
        ralign = BYTES_PER_WORD;
    }
    /*
     * Redzoning and user store require word alignment. Note this will be
     * overridden by architecture or caller mandated alignment if either
     * is greater than BYTES_PER_WORD.
     */
    if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER)
        ralign = BYTES_PER_WORD;
    /* 2) arch mandated alignment */
    if (ralign < ARCH_SLAB_MINALIGN) {
        ralign = ARCH_SLAB_MINALIGN;
    }
    /* 3) caller mandated alignment */
    if (ralign < align) {
        ralign = align;
    }
    /* disable debug if necessary */
    if (ralign > BYTES_PER_WORD)
        flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
    /*
     * 4) Store it.
     */
    align = ralign;
    /* Get cache's description obj. */
    cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL);
    if (!cachep)
        goto oops;
#if DEBUG 
    cachep->obj_size = size;
    /*
     * Both debugging options require word-alignment which is calculated
     * into align above.
     */
    if (flags & SLAB_RED_ZONE) {
        /* add space for red zone words */
        cachep->obj_offset += BYTES_PER_WORD;
        size += 2 * BYTES_PER_WORD;
    }
    if (flags & SLAB_STORE_USER) {
        /* user store requires one word storage behind the end of
         * the real object.
         */
        size += BYTES_PER_WORD;
    }
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 
    if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
        && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) {
        cachep->obj_offset += PAGE_SIZE - size;
        size = PAGE_SIZE;
    }
#endif 
#endif 
    /*
     * Determine if the slab management is 'on' or 'off' slab.
     * (bootstrapping cannot cope with offslab caches so don't do
     * it too early on.)
     */
    if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
        /*
         * Size is large, assume best to place the slab management obj
         * off-slab (should allow better packing of objs).
         */
        flags |= CFLGS_OFF_SLAB;
    size = ALIGN(size, align);
    left_over = calculate_slab_order(cachep, size, align, flags);
    if (!cachep->num) {
        printk("kmem_cache_create: couldn't create cache %s./n", name);
        kmem_cache_free(&cache_cache, cachep);
        cachep = NULL;
        goto oops;
    }
    slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
              + sizeof(struct slab), align);
    /*
     * If the slab has been placed off-slab, and we have enough space then
     * move it on-slab. This is at the expense of any extra colouring.
     */
    if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
        flags &= ~CFLGS_OFF_SLAB;
        left_over -= slab_size;
    }
    if (flags & CFLGS_OFF_SLAB) {
        /* really off slab. No need for manual alignment */
        slab_size =
            cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
    }
    cachep->colour_off = cache_line_size();
    /* Offset must be a multiple of the alignment. */
    if (cachep->colour_off < align)
        cachep->colour_off = align;
    cachep->colour = left_over / cachep->colour_off;
    cachep->slab_size = slab_size;
    cachep->flags = flags;
    cachep->gfpflags = 0;
    if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
        cachep->gfpflags |= GFP_DMA;
    cachep->buffer_size = size;
    cachep->reciprocal_buffer_size = reciprocal_value(size);
    if (flags & CFLGS_OFF_SLAB) {
        cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
        /*
         * This is a possibility for one of the malloc_sizes caches.
         * But since we go off slab only for object size greater than
         * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
         * this should not happen at all.
         * But leave a BUG_ON for some lucky dude.
         */
        BUG_ON(!cachep->slabp_cache);
    }
    cachep->ctor = ctor;
    cachep->dtor = dtor;
    cachep->name = name;
    if (setup_cpu_cache(cachep)) {
        __kmem_cache_destroy(cachep);
        cachep = NULL;
        goto oops;
    }
    /* cache setup completed, link it into the list */
    list_add(&cachep->next, &cache_chain);
oops:
    if (!cachep && (flags & SLAB_PANIC))
        panic("kmem_cache_create(): failed to create slab `%s'/n",
              name);
    mutex_unlock(&cache_chain_mutex);
    return cachep;
}

这个函数不能在中断上下文中调用，因为它可能会睡眠。

要销毁一个高速缓存可以使用kmem_cache_destroy()函数：

/**
 * kmem_cache_destroy - delete a cache
 * @cachep: the cache to destroy
 *
 * Remove a &struct kmem_cache object from the slab cache.
 *
 * It is expected this function will be called by a module when it is
 * unloaded.  This will remove the cache completely, and avoid a duplicate
 * cache being allocated each time a module is loaded and unloaded, if the
 * module doesn't have persistent in-kernel storage across loads and unloads.
 *
 * The cache must be empty before calling this function.
 *
 * The caller must guarantee that noone will allocate memory from the cache
 * during the kmem_cache_destroy().
 */
void kmem_cache_destroy(struct kmem_cache *cachep)
{
    BUG_ON(!cachep || in_interrupt());
    /* Find the cache in the chain of caches. */
    mutex_lock(&cache_chain_mutex);
    /*
     * the chain is never empty, cache_cache is never destroyed
     */
    list_del(&cachep->next);
    if (__cache_shrink(cachep)) {
        slab_error(cachep, "Can't free all objects");
        list_add(&cachep->next, &cache_chain);
        mutex_unlock(&cache_chain_mutex);
        return;
    }
    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
        synchronize_rcu();
    __kmem_cache_destroy(cachep);
    mutex_unlock(&cache_chain_mutex);
}

这个函数也不能在中断上下文中调用，因为它可能会睡眠。

调用kmem_cache_destroy()之前必须确保以下两个条件：

1. 告诉缓存中的所有slab都必须为空。其实，不管哪个slab中只要还有一个对象被分配出去，并正在使用的话，不可能销毁这个高速缓存。

2. 在调用该函数期间，不再访问这个高速缓存。调用者必须确保这种同步。

创建高速缓存后，通过kmem_cache_alloc()函数从中获取对象：

/**
 * kmem_cache_alloc - Allocate an object
 * @cachep: The cache to allocate from.
 * @flags: See kmalloc().
 *
 * Allocate an object from this cache.  The flags are only relevant
 * if the cache has no available objects.
 */
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
    return __cache_alloc(cachep, flags, __builtin_return_address(0));
}

static __always_inline void *
__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
{
    unsigned long save_flags;
    void *objp;
    cache_alloc_debugcheck_before(cachep, flags);
    local_irq_save(save_flags);
    objp = __do_cache_alloc(cachep, flags);
    local_irq_restore(save_flags);
    objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
    prefetchw(objp);
    return objp;
}

如果高速缓存的所有slab中都没有空闲的对象，那么slab层必须通过kmem_getpages()获取新的页。

最后，使用kmem_cache_free()函数释放一个对象，并把它返回给原先的slab：

/**
 * kmem_cache_free - Deallocate an object
 * @cachep: The cache the allocation was from.
 * @objp: The previously allocated object.
 *
 * Free an object which was previously allocated from this
 * cache.
 */
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
    unsigned long flags;
    BUG_ON(virt_to_cache(objp) != cachep);
    local_irq_save(flags);
    debug_check_no_locks_freed(objp, obj_size(cachep));
    __cache_free(cachep, objp);
    local_irq_restore(flags);
}

/*
 * Release an obj back to its cache. If the obj has a constructed state, it must
 * be in this state _before_ it is released.  Called with disabled ints.
 */
static inline void __cache_free(struct kmem_cache *cachep, void *objp)
{
    struct array_cache *ac = cpu_cache_get(cachep);
    check_irq_off();
    objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
    if (cache_free_alien(cachep, objp))
        return;
    if (likely(ac->avail < ac->limit)) {
        STATS_INC_FREEHIT(cachep);
        ac->entry[ac->avail++] = objp;
        return;
    } else {
        STATS_INC_FREEMISS(cachep);
        cache_flusharray(cachep, ac);
        ac->entry[ac->avail++] = objp;
    }
}