Linux内存管理之物理页面分配

本文详细介绍了Linux内核中的页面分配器,包括其基于伙伴系统算法和zone-based设计理念的工作原理。文章深入探讨了核心接口如alloc_pages和free_pages的具体实现,并解释了分配掩码的作用及zone水位检查的重要性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

概述

页面分配器是Linux内核内存管理中最基本的分配器,基于伙伴系统算法(buddy)和zone-base的设计理念。

物理页面分配接口

alloc_pages

alloc_pages接口最终会调用到__alloc_pages_nodemask。

struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
            struct zonelist *zonelist, nodemask_t *nodemask)
{
    enum zone_type high_zoneidx = gfp_zone(gfp_mask);
    struct zone *preferred_zone;
    struct page *page = NULL;
    int migratetype = allocflags_to_migratetype(gfp_mask);
    unsigned int cpuset_mems_cookie;
    int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
    struct mem_cgroup *memcg = NULL;

    gfp_mask &= gfp_allowed_mask;

    lockdep_trace_alloc(gfp_mask);

    might_sleep_if(gfp_mask & __GFP_WAIT); //如果配置了__GFP_WAIT,可能会睡眠

    if (should_fail_alloc_page(gfp_mask, order))
        return NULL;

    /*
     * Check the zones suitable for the gfp_mask contain at least one
     * valid zone. It's possible to have an empty zonelist as a result
     * of GFP_THISNODE and a memoryless node
     */
    if (unlikely(!zonelist->_zonerefs->zone))
        return NULL;

    /*
     * Will only have any effect when __GFP_KMEMCG is set.  This is
     * verified in the (always inline) callee
     */
    if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
        return NULL;

retry_cpuset:
    cpuset_mems_cookie = get_mems_allowed();

    /* The preferred zone is used for statistics later */
    first_zones_zonelist(zonelist, high_zoneidx,  //扫描zone,优先从ZONE_HIGHMEM分配
                nodemask ? : &cpuset_current_mems_allowed,
                &preferred_zone);
    if (!preferred_zone)
        goto out;

    /* First allocation attempt */
    page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
            zonelist, high_zoneidx, alloc_flags,
            preferred_zone, migratetype);
    if (unlikely(!page)) {
        /*
         * Runtime PM, block IO and its error handling path
         * can deadlock because I/O on the device might not
         * complete.
         */
        gfp_mask = memalloc_noio_flags(gfp_mask);
        page = __alloc_pages_slowpath(gfp_mask, order,
                zonelist, high_zoneidx, nodemask,
                preferred_zone, migratetype);
    }

    trace_mm_page_alloc(page, order, gfp_mask, migratetype);

out:
    /*
     * When updating a task's mems_allowed, it is possible to race with
     * parallel threads in such a way that an allocation can fail while
     * the mask is being updated. If a page allocation is about to fail,
     * check if the cpuset changed during allocation and if so, retry.
     */
    if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
        goto retry_cpuset;

    memcg_kmem_commit_charge(page, memcg, order);

    return page; //返回page
}

__get_free_pages

unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
{
    struct page *page;

    /*
     * __get_free_pages() returns a 32-bit address, which cannot represent
     * a highmem page
     */
    VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);  //不能在高端内存分配

    page = alloc_pages(gfp_mask, order); //还是基于alloc_pages分配 
    if (!page)
        return 0;
    return (unsigned long) page_address(page); //返回的不是Page,是线性地址

物理页面释放接口

free_pages

free_pages最终会调用到free_pages

void __free_pages(struct page *page, unsigned int order)
{
    if (put_page_testzero(page)) {
        if (order == 0) //order为0时另外处理
            free_hot_cold_page(page, 0);
        else
            __free_pages_ok(page, order);
    }
}

分配掩码

gfp.h中定义了常用的分配掩码。这些掩码会改变分配的行为。

#define GFP_ATOMIC  (__GFP_HIGH)
#define GFP_NOIO    (__GFP_WAIT)
#define GFP_NOFS    (__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL  (__GFP_WAIT | __GFP_IO | __GFP_FS)
#define GFP_TEMPORARY   (__GFP_WAIT | __GFP_IO | __GFP_FS | \
             __GFP_RECLAIMABLE)
#define GFP_USER    (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER    (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
             __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE    (__GFP_WAIT | __GFP_IO | __GFP_FS | \
                 __GFP_HARDWALL | __GFP_HIGHMEM | \
                 __GFP_MOVABLE)
#define GFP_IOFS    (__GFP_IO | __GFP_FS)

zone水位

分配内存时会进行水位检测。

static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
              int classzone_idx, int alloc_flags, long free_pages)
{
    /* free_pages my go negative - that's OK */
    long min = mark;
    long lowmem_reserve = z->lowmem_reserve[classzone_idx];
    int o;

    free_pages -= (1 << order) - 1;
    if (alloc_flags & ALLOC_HIGH)
        min -= min / 2;
    if (alloc_flags & ALLOC_HARDER)
        min -= min / 4; //努力的去分配
#ifdef CONFIG_CMA
    /*
     * We don't want to regard the pages on CMA region as free
     * on watermark checking, since they cannot be used for
     * unmovable/reclaimable allocation and they can suddenly
     * vanish through CMA allocation
     */
    if (IS_ENABLED(CONFIG_CMA) && z->managed_cma_pages)
        free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
#endif

    if (free_pages <= min + lowmem_reserve) //空闲页面不能小于min(min_free_kbytes)和预留的空间之和
        return false;
    for (o = 0; o < order; o++) {
        /* At the next order, this order's pages become unavailable */
        free_pages -= z->free_area[o].nr_free << o;

        /* Require fewer higher order pages to be free */
        min >>= min_free_order_shift;

        if (free_pages <= min)
            return false;
    }
    return true;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值