作者:李万鹏
void __init build_all_zonelists(void)
{
int i;
for_each_online_node(i)
build_zonelists(NODE_DATA(i));
printk("Built %i zonelists\n", num_online_nodes());
}
NUMA:
static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
{
switch (k) {
struct zone *zone;
default:
BUG();
case ZONE_HIGHMEM:
zone = pgdat->node_zones + ZONE_HIGHMEM;
if (zone->present_pages) {
#ifndef CONFIG_HIGHMEM
BUG();
#endif
zonelist->zones[j++] = zone;
}
case ZONE_NORMAL:
zone = pgdat->node_zones + ZONE_NORMAL;
if (zone->present_pages)
zonelist->zones[j++] = zone;
case ZONE_DMA:
zone = pgdat->node_zones + ZONE_DMA;
if (zone->present_pages)
zonelist->zones[j++] = zone;
}
return j;
}
static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; local_node = pgdat->node_id; for (i = 0; i < GFP_ZONETYPES; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); j = 0; k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(pgdat, zonelist, j, k); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. * We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the * zones coming right after the local ones are those from * node N+1 (modulo N) */ for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } zonelist->zones[j] = NULL; } }
UMA:
#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; /* initialize zonelists */ for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); zonelist->zones[0] = NULL; } /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; load = num_online_nodes(); prev_node = local_node; nodes_clear(used_mask); while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */ if (node_distance(local_node, node) != node_distance(local_node, prev_node)) node_load[node] += load; prev_node = node; load--; for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); zonelist->zones[j] = NULL; } } }
no discontig
void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; low = max_low_pfn; high = highend_pfn; if (low < max_dma) zones_size[ZONE_DMA] = low; else { zones_size[ZONE_DMA] = max_dma; zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = high - low; #endif } free_area_init(zones_size); }
discontig
void __init zone_sizes_init(void)
{
int nid;
/*
* Insert nodes into pgdat_list backward so they appear in order.
* Clobber node 0's links and NULL out pgdat_list before starting.
*/
pgdat_list = NULL;
for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) {
if (!node_online(nid))
continue;
if (nid)
memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
NODE_DATA(nid)->pgdat_next = pgdat_list;
pgdat_list = NODE_DATA(nid);
}
for_each_online_node(nid) {
unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
unsigned long *zholes_size;
unsigned int max_dma;
unsigned long low = max_low_pfn;
unsigned long start = node_start_pfn[nid];
unsigned long high = node_end_pfn[nid];
max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
if (start > low) {
#ifdef CONFIG_HIGHMEM
BUG_ON(start > high);
zones_size[ZONE_HIGHMEM] = high - start;
#endif
} else {
if (low < max_dma)
zones_size[ZONE_DMA] = low;
else {
BUG_ON(max_dma > low);
BUG_ON(low > high);
zones_size[ZONE_DMA] = max_dma;
zones_size[ZONE_NORMAL] = low - max_dma;
#ifdef CONFIG_HIGHMEM
zones_size[ZONE_HIGHMEM] = high - low;
#endif
}
}
zholes_size = get_zholes_size(nid);
/*
* We let the lmem_map for node 0 be allocated from the
* normal bootmem allocator, but other nodes come from the
* remapped KVA area - mbligh
*/
if (!nid)
free_area_init_node(nid, NODE_DATA(nid),
zones_size, start, zholes_size);
else {
unsigned long lmem_map;
lmem_map = (unsigned long)node_remap_start_vaddr[nid];
lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1;
lmem_map &= PAGE_MASK;
NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map;
free_area_init_node(nid, NODE_DATA(nid), zones_size,
start, zholes_size);
}
}
return;
}