初始化内存管理区列表

本文介绍了Linux内核启动过程中的内存管理区初始化,重点解析了start_kernel函数中调用的build_all_zonelists()函数,以及如何通过for_each_online_node循环和build_zonelists函数来构建内存管理区列表。同时提到了build_zonelist_cache函数在初始化内存管理区缓存的作用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

5.4 初始化内存管理区列表

回到start_kernel函数,569行的build_all_zonelists()函数,来自mm/page_alloc.c

 

2815void build_all_zonelists(void)

2816{

2817        set_zonelist_order();

2818

2819        if (system_state == SYSTEM_BOOTING) {

2820                __build_all_zonelists(NULL);

2821                mminit_verify_zonelist();

2822                cpuset_init_current_mems_allowed();

2823        } else {

2824                /* we have to stop all cpus to guarantee there is no user

2825                   of zonelist */

2826                stop_machine(__build_all_zonelists, NULL, NULL);

2827                /* cpuset refresh routine should be here */

2828        }

2829        vm_total_pages = nr_free_pagecache_pages();

2830        /* ……一大堆注释*/

2837        if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))

2838                page_group_by_mobility_disabled = 1;

2839        else

2840                page_group_by_mobility_disabled = 0;

2841

2842        printk("Built %i zonelists in %s order, mobility grouping %s.  "

2843                "Total pages: %ld/n",

2844                        nr_online_nodes,

2845                        zonelist_order_name[current_zonelist_order],

2846                        page_group_by_mobility_disabled ? "off" : "on",

2847                        vm_total_pages);

2848#ifdef CONFIG_NUMA

2849        printk("Policy zone: %s/n", zone_names[policy_zone]);

2850#endif

2851}

 

其本质上调用__build_all_zonelists(NULL)

 

2780/* return values int ....just for stop_machine() */

2781static int __build_all_zonelists(void *dummy)

2782{

2783        int nid;

2784        int cpu;

2785

2786#ifdef CONFIG_NUMA

2787        memset(node_load, 0, sizeof(node_load));

2788#endif

2789        for_each_online_node(nid) {

2790                pg_data_t *pgdat = NODE_DATA(nid);

2791

2792                build_zonelists(pgdat);

2793                build_zonelist_cache(pgdat);

2794        }

2795

2796        /* ……一大堆注释*/

2809        for_each_possible_cpu(cpu)

2810                setup_pageset(&per_cpu(boot_pageset, cpu), 0);

2811

2812        return 0;

2813}

 

2789行,for_each_online_node我们很熟悉了,只执行一次的循环。2790行是最著名的pg_data_t,就是NODE_DATA(0)的那个结构。随后执行build_zonelists函数:

 

2637static void build_zonelists(pg_data_t *pgdat)

2638{

2639        int j, node, load;

2640        enum zone_type i;

2641        nodemask_t used_mask;

2642        int local_node, prev_node;

2643        struct zonelist *zonelist;

2644        int order = current_zonelist_order;

2645

2646        /* initialize zonelists */

2647        for (i = 0; i < MAX_ZONELISTS; i++) {

2648                zonelist = pgdat->node_zonelists + i;

2649                zonelist->_zonerefs[0].zone = NULL;

2650                zonelist->_zonerefs[0].zone_idx = 0;

2651        }

2652

2653        /* NUMA-aware ordering of nodes */

2654        local_node = pgdat->node_id;

2655        load = nr_online_nodes;

2656        prev_node = local_node;

2657        nodes_clear(used_mask);

2658

2659        memset(node_order, 0, sizeof(node_order));

2660        j = 0;

2661

2662        while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {

2663                int distance = node_distance(local_node, node);

2664

2665                /*

2666                 * If another node is sufficiently far away then it is better

2667                 * to reclaim pages in a zone before going off node.

2668                 */

2669                if (distance > RECLAIM_DISTANCE)

2670                        zone_reclaim_mode = 1;

2671

2672                /*

2673                 * We don't want to pressure a particular node.

2674                 * So adding penalty to the first node in same

2675                 * distance group to make it round-robin.

2676                 */

2677                if (distance != node_distance(local_node, prev_node))

2678                        node_load[node] = load;

2679

2680                prev_node = node;

2681                load--;

2682                if (order == ZONELIST_ORDER_NODE)

2683                        build_zonelists_in_node_order(pgdat, node);

2684                else

2685                        node_order[j++] = node; /* remember order */

2686        }

2687

2688        if (order == ZONELIST_ORDER_ZONE) {

2689                /* calculate node order -- i.e., DMA last! */

2690                build_zonelists_in_zone_order(pgdat, j);

2691        }

2692

2693        build_thisnode_zonelists(pgdat);

2694}

 

build_zonelists函数2647-2651初始化NODE_DATA(0)node_zonelist字段。我们继续走:

 

2697static void build_zonelist_cache(pg_data_t *pgdat)

2698{

2699        struct zonelist *zonelist;

2700        struct zonelist_cache *zlc;

2701        struct zoneref *z;

2702

2703        zonelist = &pgdat->node_zonelists[0];

2704        zonelist->zlcache_ptr = zlc = &zonelist->zlcache;

2705        bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);

2706        for (z = zonelist->_zonerefs; z->zone; z++)

2707                zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);

2708}

 

build_zonelist_cache函数初始化内存管理区的缓存,我这里就不深入下去了。回到build_all_zonelists()函数中,略去调试的代码,以及设置几个关于zone的策略的全局变量的代码,该函数就结束了。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值