kernel hacker修炼之道之内存管理-节点与管理区

浅析linux内核内存管理之节点与管理区

作者:李万鹏



void __init build_all_zonelists(void) { int i; for_each_online_node(i) build_zonelists(NODE_DATA(i)); printk("Built %i zonelists\n", num_online_nodes()); }


NUMA:


static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) { switch (k) { struct zone *zone; default: BUG(); case ZONE_HIGHMEM: zone = pgdat->node_zones + ZONE_HIGHMEM; if (zone->present_pages) { #ifndef CONFIG_HIGHMEM BUG(); #endif zonelist->zones[j++] = zone; } case ZONE_NORMAL: zone = pgdat->node_zones + ZONE_NORMAL; if (zone->present_pages) zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; if (zone->present_pages) zonelist->zones[j++] = zone; } return j; }



static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; local_node = pgdat->node_id; for (i = 0; i < GFP_ZONETYPES; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); j = 0; k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(pgdat, zonelist, j, k); /* * Now we build the zonelist so that it contains the zones * of all the other nodes. * We don't want to pressure a particular node, so when * building the zones for node N, we make sure that the * zones coming right after the local ones are those from * node N+1 (modulo N) */ for (node = local_node + 1; node < MAX_NUMNODES; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } for (node = 0; node < local_node; node++) { if (!node_online(node)) continue; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); } zonelist->zones[j] = NULL; } }




UMA:


#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)


static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { int i, n, val; int min_val = INT_MAX; int best_node = -1; for_each_online_node(i) { cpumask_t tmp; /* Start from local node */ n = (node+i) % num_online_nodes(); /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) continue; /* Use the local node if we haven't already */ if (!node_isset(node, *used_node_mask)) { best_node = node; break; } /* Use the distance array to find the distance */ val = node_distance(node, n); /* Give preference to headless and unused nodes */ tmp = node_to_cpumask(n); if (!cpus_empty(tmp)) val += PENALTY_FOR_NODE_WITH_CPUS; /* Slight preference for less loaded node */ val *= (MAX_NODE_LOAD*MAX_NUMNODES); val += node_load[n]; if (val < min_val) { min_val = val; best_node = n; } } if (best_node >= 0) node_set(best_node, *used_node_mask); return best_node; }

static void __init build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; struct zonelist *zonelist; nodemask_t used_mask; /* initialize zonelists */ for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); zonelist->zones[0] = NULL; } /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; load = num_online_nodes(); prev_node = local_node; nodes_clear(used_mask); while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */ if (node_distance(local_node, node) != node_distance(local_node, prev_node)) node_load[node] += load; prev_node = node; load--; for (i = 0; i < GFP_ZONETYPES; i++) { zonelist = pgdat->node_zonelists + i; for (j = 0; zonelist->zones[j] != NULL; j++); k = ZONE_NORMAL; if (i & __GFP_HIGHMEM) k = ZONE_HIGHMEM; if (i & __GFP_DMA) k = ZONE_DMA; j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); zonelist->zones[j] = NULL; } } }
no discontig
void __init zone_sizes_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; low = max_low_pfn; high = highend_pfn; if (low < max_dma) zones_size[ZONE_DMA] = low; else { zones_size[ZONE_DMA] = max_dma; zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = high - low; #endif } free_area_init(zones_size); }

discontig


void __init zone_sizes_init(void) { int nid; /* * Insert nodes into pgdat_list backward so they appear in order. * Clobber node 0's links and NULL out pgdat_list before starting. */ pgdat_list = NULL; for (nid = MAX_NUMNODES - 1; nid >= 0; nid--) { if (!node_online(nid)) continue; if (nid) memset(NODE_DATA(nid), 0, sizeof(pg_data_t)); NODE_DATA(nid)->pgdat_next = pgdat_list; pgdat_list = NODE_DATA(nid); } for_each_online_node(nid) { unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned long *zholes_size; unsigned int max_dma; unsigned long low = max_low_pfn; unsigned long start = node_start_pfn[nid]; unsigned long high = node_end_pfn[nid]; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; if (start > low) { #ifdef CONFIG_HIGHMEM BUG_ON(start > high); zones_size[ZONE_HIGHMEM] = high - start; #endif } else { if (low < max_dma) zones_size[ZONE_DMA] = low; else { BUG_ON(max_dma > low); BUG_ON(low > high); zones_size[ZONE_DMA] = max_dma; zones_size[ZONE_NORMAL] = low - max_dma; #ifdef CONFIG_HIGHMEM zones_size[ZONE_HIGHMEM] = high - low; #endif } } zholes_size = get_zholes_size(nid); /* * We let the lmem_map for node 0 be allocated from the * normal bootmem allocator, but other nodes come from the * remapped KVA area - mbligh */ if (!nid) free_area_init_node(nid, NODE_DATA(nid), zones_size, start, zholes_size); else { unsigned long lmem_map; lmem_map = (unsigned long)node_remap_start_vaddr[nid]; lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; lmem_map &= PAGE_MASK; NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; free_area_init_node(nid, NODE_DATA(nid), zones_size, start, zholes_size); } } return; }























评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值