kernel-4.4 slab(二)

最新推荐文章于 2024-10-24 21:02:35 发布

laughing_zou

最新推荐文章于 2024-10-24 21:02:35 发布

阅读量280

点赞数

分类专栏： Linux memory management

本文链接：https://blog.youkuaiyun.com/zsj100213/article/details/81509352

版权

Linux memory management 专栏收录该内容

54 篇文章

订阅专栏

本文详细解析Linux内核中Slab分配器的工作原理，包括对象分配流程、关键函数作用及内部数据结构。从kmem_cache_alloc()入口，深入到__cache_alloc()、cache_alloc_refill()及cache_grow()等核心函数，揭示Slab对象如何高效地在不同缓存层级间迁移。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

slab对象的分配：

slab对象的分配使用kmem_cache_alloc():

void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *ret = slab_alloc(cachep, flags, _RET_IP_);

	trace_kmem_cache_alloc(_RET_IP_, ret,
			       cachep->object_size, cachep->size, flags);

	return ret;
}

实际执行函数为slab_alloc()->__do_cache_alloc()->____cache_alloc()

static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
	void *objp;
	struct array_cache *ac;
	bool force_refill = false;

	check_irq_off();

	ac = cpu_cache_get(cachep);---------------（1）
	if (likely(ac->avail)) {------------------（2）
		ac->touched = 1;
		objp = ac_get_obj(cachep, ac, flags, false);------------（3）

		/*
		 * Allow for the possibility all avail objects are not allowed
		 * by the current flags
		 */
		if (objp) {
			STATS_INC_ALLOCHIT(cachep);
			goto out;
		}
		force_refill = true;----------------(4)
	}

	STATS_INC_ALLOCMISS(cachep);
	objp = cache_alloc_refill(cachep, flags, force_refill);--------------（5）
	/*
	 * the 'ac' may be updated by cache_alloc_refill(),
	 * and kmemleak_erase() requires its correct value.
	 */
	ac = cpu_cache_get(cachep);

out:
	/*
	 * To avoid a false negative, if an object that is in one of the
	 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
	 * treat the array pointers as a reference to the object.
	 */
	if (objp)
		kmemleak_erase(&ac->entry[ac->avail]);
	return objp;
}

(1)获取array_cache结构体，由于之前初始化slab描述符时已经初始化完毕，固肯定能获取到。

(2)如果array_cache的avail值不为0表示当前cpu本地缓冲池中有空闲对象，则调用ac_get_obj获取slab对象。

(3)获取slab对象：

static inline void *ac_get_obj(struct kmem_cache *cachep,
			struct array_cache *ac, gfp_t flags, bool force_refill)
{
	void *objp;

	if (unlikely(sk_memalloc_socks()))
		objp = __ac_get_obj(cachep, ac, flags, force_refill);
	else
		objp = ac->entry[--ac->avail];//直接获取当前array_cache的entry数组最后一个成员

	return objp;
}

(4)如果没有分配成功，则进入cache_alloc_refill()重新分配对象缓冲池

(5)如果array_cache不存在空闲对象，则直接调用核心函数cache_alloc_refill()获取slab对象。

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
							bool force_refill)
{
	int batchcount;
	struct kmem_cache_node *n;
	struct array_cache *ac;
	int node;

	check_irq_off();
	node = numa_mem_id();
	if (unlikely(force_refill))
		goto force_grow;
retry:
	ac = cpu_cache_get(cachep);--------------（1）
	batchcount = ac->batchcount;
	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
		/*
		 * If there was little recent activity on this cache, then
		 * perform only a partial refill.  Otherwise we could generate
		 * refill bouncing.
		 */
		batchcount = BATCHREFILL_LIMIT;
	}
	n = get_node(cachep, node);--------------（2）

	BUG_ON(ac->avail > 0 || !n);
	spin_lock(&n->list_lock);

	/* See if we can refill from the shared array */
	if (n->shared && transfer_objects(ac, n->shared, batchcount)) {---------（3）
		n->shared->touched = 1;
		goto alloc_done;
	}

	while (batchcount > 0) {
		struct list_head *entry;
		struct page *page;
		/* Get slab alloc is to come from. */
		entry = n->slabs_partial.next;----------------（4）
		if (entry == &n->slabs_partial) {
			n->free_touched = 1;
			entry = n->slabs_free.next;
			if (entry == &n->slabs_free)
				goto must_grow;
		}

		page = list_entry(entry, struct page, lru);--------------（5）
		check_spinlock_acquired(cachep);

		/*
		 * The slab was either on partial or free list so
		 * there must be at least one object available for
		 * allocation.
		 */
		BUG_ON(page->active >= cachep->num);------------（6）

		while (page->active < cachep->num && batchcount--) {------------(7)
			STATS_INC_ALLOCED(cachep);
			STATS_INC_ACTIVE(cachep);
			STATS_SET_HIGH(cachep);

			ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
									node));---------------（8）
		}

		/* move slabp to correct slabp list: */
		list_del(&page->lru);-------------------------（9）
		if (page->active == cachep->num)-----------------（10）
			list_add(&page->lru, &n->slabs_full);
		else
			list_add(&page->lru, &n->slabs_partial);
	}

must_grow:
	n->free_objects -= ac->avail;
alloc_done:
	spin_unlock(&n->list_lock);

	if (unlikely(!ac->avail)) {
		int x;
force_grow:
		x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);--------------（11）

		/* cache_grow can reenable interrupts, then ac could change. */
		ac = cpu_cache_get(cachep);
		node = numa_mem_id();

		/* no objects in sight? abort */
		if (!x && (ac->avail == 0 || force_refill))
			return NULL;

		if (!ac->avail)		/* objects refilled by interrupt? */-------------（12）
			goto retry;
	}
	ac->touched = 1;

	return ac_get_obj(cachep, ac, flags, force_refill);----------------（13）
}

(1)获取本CPU的本地对象缓冲池描述符

(2)获取slab节点描述符，前面已经初始化好

(3)如果slab节点的shared共享缓冲池有初始化，并且从中获取batchcount个slab对象成功，则goto alloc_done.

(4)如果共享缓冲池分配失败，则进入while循环，先后判断slab节点的slabs_partial和slabs_free节点是否为空，如果为空，则goto must_grow

(5)如果slab partial或者free不为空，则使用list_entry先获取当前链表的中的lru对应的page结构体

(6)如果page->active多余slab描述度中最大slab对象个数，报错

(7)关键操作，通过while循环，从当前收个页面为page的缓冲池中取出batchcount个slab对象添加到CPU的本地对象缓冲池

(8)从缓冲池中获取一个slab对象，添加到CPU本地对象缓冲池，此时page->active会加1，此时如果出现active=num且batchcount>0，则需要重新开始寻找可以分配slab对象的partial或者free链表，完成将ac->batchcount迁移到CPU本地对象缓冲池中

(9)将page从远链表中删除

(10)判断当前page的active是否等于slab的num，如果等于则将page移动到full链表，否则就移动到partial中

(11)如果partial和free链表都为空，则说明没有空闲的slab对象，需要重新创建一个对象缓冲池

(12)cache_grow函数只是重新分配了slab对象缓冲池，然后将其添加到slab_free链表，此处avail仍然为0，需要retry，此时会添加batchcount个slab对象到CPU本地对象缓冲池中，然后avail不等于0。

(13)从CPU本地对象缓冲池中获取一个slab对象，完成slab对象的分配。

下面来看cache_grow()函数：

static int cache_grow(struct kmem_cache *cachep,
		gfp_t flags, int nodeid, struct page *page)
{
	void *freelist;
	size_t offset;
	gfp_t local_flags;
	struct kmem_cache_node *n;

	/*
	 * Be lazy and only check for valid flags here,  keeping it out of the
	 * critical path in kmem_cache_alloc().
	 */
	if (unlikely(flags & GFP_SLAB_BUG_MASK)) {
		pr_emerg("gfp: %u\n", flags & GFP_SLAB_BUG_MASK);
		BUG();
	}
	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);

	/* Take the node list lock to change the colour_next on this node */
	check_irq_off();
	n = get_node(cachep, nodeid);
	spin_lock(&n->list_lock);

	/* Get colour for the slab, and cal the next value. */
	offset = n->colour_next;------------------（1）
	n->colour_next++;
	if (n->colour_next >= cachep->colour)
		n->colour_next = 0;
	spin_unlock(&n->list_lock);

	offset *= cachep->colour_off;

	if (gfpflags_allow_blocking(local_flags))
		local_irq_enable();

	/*
	 * The test for missing atomic flag is performed here, rather than
	 * the more obvious place, simply to reduce the critical path length
	 * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
	 * will eventually be caught here (where it matters).
	 */
	kmem_flagcheck(cachep, flags);

	/*
	 * Get mem for the objs.  Attempt to allocate a physical page from
	 * 'nodeid'.
	 */
	if (!page)
		page = kmem_getpages(cachep, local_flags, nodeid);-----------（2）
	if (!page)
		goto failed;

	/* Get slab management. */
	freelist = alloc_slabmgmt(cachep, page, offset,
			local_flags & ~GFP_CONSTRAINT_MASK, nodeid);---------（3）
	if (!freelist)
		goto opps1;

	slab_map_pages(cachep, page, freelist);---------------（4）

	cache_init_objs(cachep, page);----------------（5）

	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();
	check_irq_off();
	spin_lock(&n->list_lock);

	/* Make slab active. */
	list_add_tail(&page->lru, &(n->slabs_free));---------------（6）
	STATS_INC_GROWN(cachep);
	n->free_objects += cachep->num;-----------------------（7）
	spin_unlock(&n->list_lock);
	return 1;
opps1:
	kmem_freepages(cachep, page);
failed:
	if (gfpflags_allow_blocking(local_flags))
		local_irq_disable();
	return 0;
}

(1)colour_next表示下一个slab节点应该包含的colour数目，cache_colour从0开始增加，每个slab加1知道达到这个slab的最大值cachep->colour，然后再从0开始计算。

(2)为slab对象缓冲池分配2^gfporder个page

(3)初始化freelist,page->active,page->s_mem,freelist为slab对象缓冲池的首个page的虚拟地址+colour_next得到，page->active初始化为0，page->s_mem为首个page的虚拟地址+colour_next+cachep->freelist_size

(4)初始化page->slab_cache=cachep, page->freelist=freelist

(5)初始化obj的状态为OBJECT_FREE以及page->freelist[]数组

(6)将新申请的slab对象缓冲池添加到slab node的slabs_free上

(7)slab node的总的free_objects数量加上cachep->num个数.

以上为slab对象分配的全过程，讲到这里可以画出slab的整体的框架图如下：