释放低端内存
调用流程
mem_init----->__free_all_bootmem()—>free_all_bootmem()—>free_all_bootmem_core(NODE_DATA(0))–>
free_all_bootmem_core(pgdat)–>
free_all_bootmem_core函数
259 static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
260 {
261 struct page *page;
262 bootmem_data_t *bdata = pgdat->bdata;
263 unsigned long i, count, total = 0;
264 unsigned long idx;
265 unsigned long *map;
266 int gofast = 0;
267
268 BUG_ON(!bdata->node_bootmem_map);
269
270 count = 0;
271 /* first extant page of the node */
272 page = virt_to_page(phys_to_virt(bdata->node_boot_start));
273 idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
274 map = bdata->node_bootmem_map;
275 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */
276 if (bdata->node_boot_start == 0 ||
277 ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))
278 gofast = 1;
279 for (i = 0; i < idx; ) {
280 unsigned long v = ~map[i / BITS_PER_LONG];
281 if (gofast && v == ~0UL) {
282 int j, order;
283
284 count += BITS_PER_LONG;
285 __ClearPageReserved(page);
286 order = ffs(BITS_PER_LONG) - 1;
287 set_page_refs(page, order);
288 for (j = 1; j < BITS_PER_LONG; j++) {
289 if (j + 16 < BITS_PER_LONG)
290 prefetchw(page + j + 16);
291 __ClearPageReserved(page + j);
292 }
293 printk(KERN_ERR "tom page=%x\r\n",page);
294 if((page-mem_map)==0x100)
295 {
296 printk(KERN_ERR "tom order=%x\r\n",order);
297 }
298 __free_pages(page, order);
299 i += BITS_PER_LONG;
300 page += BITS_PER_LONG;
301 } else if (v) {
302 unsigned long m;
303 for (m = 1; m && i < idx; m<<=1, page++, i++) {
304 if (v & m) {
305 count++;
306 __ClearPageReserved(page);
307 set_page_refs(page, 0);
308 __free_page(page);
309 }
310 }
311 } else {
312 i+=BITS_PER_LONG;
313 page += BITS_PER_LONG;
314 }
315 }
316 total += count;
317
318 /*
319 * Now free the allocator bitmap itself, it's not
320 * needed anymore:
321 */
322 page = virt_to_page(bdata->node_bootmem_map);
323 count = 0;
324 for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
325 count++;
326 __ClearPageReserved(page);
327 set_page_count(page, 1);
328 __free_page(page);
329 }
330 total += count;
331 bdata->node_bootmem_map = NULL;
332
333 return total;
334 }
说明:
unsigned long v = ~map[i / BITS_PER_LONG]通过内存位图数组map查询32(2^5)个页的使用情况,如果v是0,则32个页都是没有使用可释放直接调用函数 __free_pages(page, order)(order = ffs(BITS_PER_LONG) - 1=5);如果不是0,则看v中哪些位是0,则释放相应位对应的页page,调用函数__free_page(page)。
释放单个页
调用流程:__free_page(page)------>__free_pages((page), 0)---->free_hot_page(page)---->free_hot_cold_page(page,0)–>free_pages_bulk(zone, pcp->batch, &pcp->list, 0)
__free_pages()函数
#define __free_page(page) __free_pages((page), 0)
901 fastcall void __free_pages(struct page *page, unsigned int order)
902 {
903 if (!PageReserved(page) && put_page_testzero(page)) {
904 if (order == 0)
905 free_hot_page(page);
906 else
907 __free_pages_ok(page, order);
908 }
909 }
先判断是释放单个页,还是多个页。如果单个页,则调用free_hot_page,如果是释放多个页,则调用__free_pages_ok。
free_hot_cold_page
593 void fastcall free_hot_page(struct page *page)
594 {
595 free_hot_cold_page(page, 0);
596 }
570 static void fastcall free_hot_cold_page(struct page *page, int cold)
571 {
572 struct zone *zone = page_zone(page);
573 struct per_cpu_pages *pcp;
574 unsigned long flags;
575
576 arch_free_page(page, 0);
577
578 kernel_map_pages(page, 1, 0);
579 inc_page_state(pgfree);
580 if (PageAnon(page))
581 page->mapping = NULL;
582 free_pages_check(__FUNCTION__, page);
583 pcp = &zone->pageset[get_cpu()].pcp[cold];
584 local_irq_save(flags);
585 if (pcp->count >= pcp->high)
586 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
587 list_add(&page->lru, &pcp->list);
588 pcp->count++;
589 local_irq_restore(flags);
590 put_cpu();
591 }
说明:
1)pcp->count >= pcp->high当per-cpu缓存的数量大于最高数量,则把数量pcp-batch的一批内存还给内存。通过list_add(&page->lru, &pcp->list)把这个页加入到per-cpu缓存页的链表中。
释放多个页
流程: __free_pages(page, order)---------->__free_pages_ok(page, order)----> free_pages_bulk(page_zone(page), 1, &list, order)
236 static inline void __free_pages_bulk (struct page *page, struct page *base,
237 struct zone *zone, unsigned int order)
238 {
239 unsigned long page_idx;
240 struct page *coalesced;
241 int order_size = 1 << order;
242
243 if (unlikely(order))
244 destroy_compound_page(page, order);
245
246 page_idx = page - base;
247
248 BUG_ON(page_idx & (order_size - 1));
249 BUG_ON(bad_range(zone, page));
250
251 zone->free_pages += order_size;
252 while (order < MAX_ORDER-1) {
253 struct free_area *area;
254 struct page *buddy;
255 int buddy_idx;
256
257 buddy_idx = (page_idx ^ (1 << order));
258 buddy = base + buddy_idx;
259 if(page== 0xc16fbc00)
260 printk(KERN_ERR "tom 1 L=%d page_idx=%x buddy_idx=%x M=%x\n",__LINE__,page_idx,buddy_idx,MAX_ORDER);
261
262
263 if(page== 0xc16fbc00)
264 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x order=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private,order);
265
266 if (bad_range(zone, buddy))
267 {
268 if(page== 0xc16fbc00)
269 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private);
270 break;
271 }
272 if (!page_is_buddy(buddy, order))
273 {
274 if(page== 0xc16fbc00)
275 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private);
276 break;
277 }
278 /* Move the buddy up one level. */
279 list_del(&buddy->lru);
280 area = zone->free_area + order;
281 area->nr_free--;
282 rmv_page_order(buddy);
283 page_idx &= buddy_idx;
284 if(page== 0xc16fbc00)
285 printk(KERN_ERR "tom 1 L=%d page_idx=%x buddy_idx=%x M=%x\n",__LINE__,page_idx,buddy_idx,MAX_ORDER);
286 order++;
287 }
288 coalesced = base + page_idx;
289 if(page== 0xc16fbc00)
290 printk(KERN_ERR "tom L=%d coalesced=%x order=%x\n",__LINE__,coalesced,order);
291 set_page_order(coalesced, order);
292 list_add(&coalesced->lru, &zone->free_area[order].free_list);
293 zone->free_area[order].nr_free++;
294 }
调试信息如下:
tom 1 L=260 page_idx=36de0 buddy_idx=36dc0 M=b
tom L=264 page_idx=36de0 buddy_idx=36dc0 page=c16fbc00 buddy=c16fb800 buddy->private=5 order=5
tom 1 L=285 page_idx=36dc0 buddy_idx=36dc0 M=b
tom 1 L=260 page_idx=36dc0 buddy_idx=36d80 M=b
tom L=264 page_idx=36dc0 buddy_idx=36d80 page=c16fbc00 buddy=c16fb000 buddy->private=6 order=6
tom 1 L=285 page_idx=36d80 buddy_idx=36d80 M=b
tom 1 L=260 page_idx=36d80 buddy_idx=36d00 M=b
tom L=264 page_idx=36d80 buddy_idx=36d00 page=c16fbc00 buddy=c16fa000 buddy->private=7 order=7
tom 1 L=285 page_idx=36d00 buddy_idx=36d00 M=b
tom 1 L=260 page_idx=36d00 buddy_idx=36c00 M=b
tom L=264 page_idx=36d00 buddy_idx=36c00 page=c16fbc00 buddy=c16f8000 buddy->private=8 order=8
tom 1 L=285 page_idx=36c00 buddy_idx=36c00 M=b
tom 1 L=260 page_idx=36c00 buddy_idx=36e00 M=b
tom L=264 page_idx=36c00 buddy_idx=36e00 page=c16fbc00 buddy=c16fc000 buddy->private=0 order=9
tom L=275 page_idx=36c00 buddy_idx=36e00 page=c16fbc00 buddy=c16fc000 buddy->private=0
tom L=290 coalesced=c16f8000 order=9
流程图如下:
说明:
1)buddy_idx = (page_idx ^ (1 << order)),使用(1<<order)掩码的异或(XOR)转换page_idx第order位的值,如果这个位原先是0,buddy_idx就等于page_idx+order_size;相反,如果这个位原先是1,buddy_idx就等于page_idx-order_size.(order_size=2^order)
2)page_idx &= buddy_idx;就是page_idx在page_idx和buddy_idx取最小值,也就是page_idx在page_idx和buddy_idx中向小的移动了一下。
3)根据图中流程:
第一个流程图,page_idx=0x36c00,则buddy_idx=0x36c20,显然buddy_idx指向的页private=0,则把page_idx=0x36c00的private=5。
第二个流程图,page_idx=0x36c20,因为第5位是1,则buddy_idx=0x36c20-0x20=0x36c00,因为0x36c00指向的页private=5,则一起合并成6,并写进0x36c00中。
整个流程:刚开始页中的private为0,buddy_idx = (page_idx ^ (1 << order))加上order_size,buddy_idx指向的private为0;则下一次page_idx+=order_size(32),对应buddy_idx则减去order_size,buddy_idx指向的private为5,则可以合并成6,写进page_idx,因为page_idx &= buddy_idx,写进较小的页中的private中。
4)把页加入伙伴系统
4.1)把页描述符的flag中PG_private置为1,然后private置为空闲页的对数,也是order
4.2)把页的lru加入相应的zone->free_area[order].free_list中
4.3)增加zone->free_area[order].nr_free++
核心就是:
buddy_idx = (page_idx ^ (1 << order));
page_idx &= buddy_idx;
通过这两句话完成向前向后移动。
释放高端内存
调用流程:
mem_init----->set_highmem_pages_init(bad_ppro)—>one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro)—>__free_page(page)–>__free_pages((page), 0)---->free_hot_page(page)---->free_hot_cold_page(page,0)–>free_pages_bulk(zone, pcp->batch, &pcp->list, 0)
328 void __init set_highmem_pages_init(int bad_ppro)
329 {
330 int pfn;
331 for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
332 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
333 totalram_pages += totalhigh_pages;
334 }
314 void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
315 {
316 if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
317 printk(KERN_ERR "one_highpage_init=%x\r\n",page);
318 ClearPageReserved(page);
319 set_bit(PG_highmem, &page->flags);
320 set_page_count(page, 1);
321 __free_page(page);
322 totalhigh_pages++;
323 } else
324 SetPageReserved(page);
325 }
结论
通过把低端内存和高端内存释放到伙伴系统中完成伙伴系统的初始化。