5.4 初始化内存管理区列表
回到start_kernel函数,569行的build_all_zonelists()函数,来自mm/page_alloc.c:
2815void build_all_zonelists(void) 2816{ 2817 set_zonelist_order(); 2818 2819 if (system_state == SYSTEM_BOOTING) { 2820 __build_all_zonelists(NULL); 2821 mminit_verify_zonelist(); 2822 cpuset_init_current_mems_allowed(); 2823 } else { 2824 /* we have to stop all cpus to guarantee there is no user 2825 of zonelist */ 2826 stop_machine(__build_all_zonelists, NULL, NULL); 2827 /* cpuset refresh routine should be here */ 2828 } 2829 vm_total_pages = nr_free_pagecache_pages(); 2830 /* ……一大堆注释*/ 2837 if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES)) 2838 page_group_by_mobility_disabled = 1; 2839 else 2840 page_group_by_mobility_disabled = 0; 2841 2842 printk("Built %i zonelists in %s order, mobility grouping %s. " 2843 "Total pages: %ld/n", 2844 nr_online_nodes, 2845 zonelist_order_name[current_zonelist_order], 2846 page_group_by_mobility_disabled ? "off" : "on", 2847 vm_total_pages); 2848#ifdef CONFIG_NUMA 2849 printk("Policy zone: %s/n", zone_names[policy_zone]); 2850#endif 2851} |
其本质上调用__build_all_zonelists(NULL):
2780/* return values int ....just for stop_machine() */ 2781static int __build_all_zonelists(void *dummy) 2782{ 2783 int nid; 2784 int cpu; 2785 2786#ifdef CONFIG_NUMA 2787 memset(node_load, 0, sizeof(node_load)); 2788#endif 2789 for_each_online_node(nid) { 2790 pg_data_t *pgdat = NODE_DATA(nid); 2791 2792 build_zonelists(pgdat); 2793 build_zonelist_cache(pgdat); 2794 } 2795 2796 /* ……一大堆注释*/ 2809 for_each_possible_cpu(cpu) 2810 setup_pageset(&per_cpu(boot_pageset, cpu), 0); 2811 2812 return 0; 2813} |
2789行,for_each_online_node我们很熟悉了,只执行一次的循环。2790行是最著名的pg_data_t,就是NODE_DATA(0)的那个结构。随后执行build_zonelists函数:
2637static void build_zonelists(pg_data_t *pgdat) 2638{ 2639 int j, node, load; 2640 enum zone_type i; 2641 nodemask_t used_mask; 2642 int local_node, prev_node; 2643 struct zonelist *zonelist; 2644 int order = current_zonelist_order; 2645 2646 /* initialize zonelists */ 2647 for (i = 0; i < MAX_ZONELISTS; i++) { 2648 zonelist = pgdat->node_zonelists + i; 2649 zonelist->_zonerefs[0].zone = NULL; 2650 zonelist->_zonerefs[0].zone_idx = 0; 2651 } 2652 2653 /* NUMA-aware ordering of nodes */ 2654 local_node = pgdat->node_id; 2655 load = nr_online_nodes; 2656 prev_node = local_node; 2657 nodes_clear(used_mask); 2658 2659 memset(node_order, 0, sizeof(node_order)); 2660 j = 0; 2661 2662 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { 2663 int distance = node_distance(local_node, node); 2664 2665 /* 2666 * If another node is sufficiently far away then it is better 2667 * to reclaim pages in a zone before going off node. 2668 */ 2669 if (distance > RECLAIM_DISTANCE) 2670 zone_reclaim_mode = 1; 2671 2672 /* 2673 * We don't want to pressure a particular node. 2674 * So adding penalty to the first node in same 2675 * distance group to make it round-robin. 2676 */ 2677 if (distance != node_distance(local_node, prev_node)) 2678 node_load[node] = load; 2679 2680 prev_node = node; 2681 load--; 2682 if (order == ZONELIST_ORDER_NODE) 2683 build_zonelists_in_node_order(pgdat, node); 2684 else 2685 node_order[j++] = node; /* remember order */ 2686 } 2687 2688 if (order == ZONELIST_ORDER_ZONE) { 2689 /* calculate node order -- i.e., DMA last! */ 2690 build_zonelists_in_zone_order(pgdat, j); 2691 } 2692 2693 build_thisnode_zonelists(pgdat); 2694} |
build_zonelists函数2647-2651初始化NODE_DATA(0)的node_zonelist字段。我们继续走:
2697static void build_zonelist_cache(pg_data_t *pgdat) 2698{ 2699 struct zonelist *zonelist; 2700 struct zonelist_cache *zlc; 2701 struct zoneref *z; 2702 2703 zonelist = &pgdat->node_zonelists[0]; 2704 zonelist->zlcache_ptr = zlc = &zonelist->zlcache; 2705 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 2706 for (z = zonelist->_zonerefs; z->zone; z++) 2707 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); 2708} |
build_zonelist_cache函数初始化内存管理区的缓存,我这里就不深入下去了。回到build_all_zonelists()函数中,略去调试的代码,以及设置几个关于zone的策略的全局变量的代码,该函数就结束了。