作者:李万鹏
void __init build_all_zonelists(void) { int i; for_each_online_node(i) build_zonelists(NODE_DATA(i)); printk("Built %i zonelists\n", num_online_nodes()); }
NUMA:
static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) { switch (k) { struct zone *zone; default: BUG(); case ZONE_HIGHMEM: zone = pgdat->node_zones + ZONE_HIGHMEM; if (zone->present_pages) { #ifndef CONFIG_HIGHMEM BUG(); #endif zonelist->zones[j++] = zone; } case ZONE_NORMAL: zone = pgdat->node_zones + ZONE_NORMAL; if (zone->present_pages) zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; if (zone->present_pages) zonelist->zones[j++] = zone; } return j; }
static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; local_node = pgdat->node_id; for (i = 0; i < GFP_ZONETYPES; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); j = 0; k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(pgdat, zonelist, j, k); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. * We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the * zones coming right after the local ones are those from * node N+1 (modulo N) */ for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } zonelist->zones[j] = NULL; } }
UMA:
#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; /* initialize zonelists */ for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); zonelist->zones[0] = NULL; } /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; load = num_online_nodes(); prev_node = local_node; nodes_clear(used_mask); while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */ if (node_distance(local_node, node) != node_distance(local_node, prev_node)) node_load[node] += load; prev_node = node; load--; for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); zonelist->zones[j] = NULL; } } }
no discontig
void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; low = max_low_pfn; high = highend_pfn; if (low < max_dma) zones_size[ZONE_DMA] = low; else { zones_size[ZONE_DMA] = max_dma; zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = high - low; #endif } free_area_init(zones_size); }
discontig
void __init zone_sizes_init(void) { int nid; /* * Insert nodes into pgdat_list backward so they appear in order. * Clobber node 0's links and NULL out pgdat_list before starting. */ pgdat_list = NULL; for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) { if (!node_online(nid)) continue; if (nid) memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); NODE_DATA(nid)->pgdat_next = pgdat_list; pgdat_list = NODE_DATA(nid); } for_each_online_node(nid) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned long *zholes_size; unsigned int max_dma; unsigned long low = max_low_pfn; unsigned long start = node_start_pfn[nid]; unsigned long high = node_end_pfn[nid]; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; if (start > low) { #ifdef CONFIG_HIGHMEM BUG_ON(start > high); zones_size[ZONE_HIGHMEM] = high - start; #endif } else { if (low < max_dma) zones_size[ZONE_DMA] = low; else { BUG_ON(max_dma > low); BUG_ON(low > high); zones_size[ZONE_DMA] = max_dma; zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = high - low; #endif } } zholes_size = get_zholes_size(nid); /* * We let the lmem_map for node 0 be allocated from the * normal bootmem allocator, but other nodes come from the * remapped KVA area - mbligh */ if (!nid) free_area_init_node(nid, NODE_DATA(nid), zones_size, start, zholes_size); else { unsigned long lmem_map; lmem_map = (unsigned long)node_remap_start_vaddr[nid]; lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; lmem_map &= PAGE_MASK; NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; free_area_init_node(nid, NODE_DATA(nid), zones_size, start, zholes_size); } } return; }