buddy初始化

本文详细解析了Linux内核中的低端内存和高端内存释放过程,包括调用流程、关键函数如__free_pages、free_hot_page及free_pages_bulk的实现细节,展示了如何将内存释放到伙伴系统,完成初始化。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

释放低端内存

调用流程

mem_init----->__free_all_bootmem()—>free_all_bootmem()—>free_all_bootmem_core(NODE_DATA(0))–>
free_all_bootmem_core(pgdat)–>

free_all_bootmem_core函数

259 static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)             
260 {                                                                               
261         struct page *page;                                                      
262         bootmem_data_t *bdata = pgdat->bdata;                                   
263         unsigned long i, count, total = 0;                                      
264         unsigned long idx;                                                      
265         unsigned long *map;                                                     
266         int gofast = 0;                                                         
267                                                                                 
268         BUG_ON(!bdata->node_bootmem_map);                                       
269                                                                                 
270         count = 0;                                                              
271         /* first extant page of the node */                                     
272         page = virt_to_page(phys_to_virt(bdata->node_boot_start));              
273         idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);     
274         map = bdata->node_bootmem_map;                                          
275         /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */             
276         if (bdata->node_boot_start == 0 ||                                      
277             ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG))      
278                 gofast = 1;                                                     
279         for (i = 0; i < idx; ) {                                                
280                 unsigned long v = ~map[i / BITS_PER_LONG];                      
281                 if (gofast && v == ~0UL) {                                      
282                         int j, order;                                           
283                                                                                 
284                         count += BITS_PER_LONG;                                 
285                         __ClearPageReserved(page);                              
286                         order = ffs(BITS_PER_LONG) - 1;                         
287                         set_page_refs(page, order);                             
288                         for (j = 1; j < BITS_PER_LONG; j++) {                   
289                                 if (j + 16 < BITS_PER_LONG)                     
290                                         prefetchw(page + j + 16);                                                                                                                                             
291                                 __ClearPageReserved(page + j);                  
292                         }                                                       
293                            printk(KERN_ERR "tom page=%x\r\n",page);        
294                         if((page-mem_map)==0x100)                               
295                         {                                                       
296                            printk(KERN_ERR "tom order=%x\r\n",order);           
297                         }                                                       
298                         __free_pages(page, order);                              
299                         i += BITS_PER_LONG;                                     
300                         page += BITS_PER_LONG;                                  
301                 } else if (v) {                                                 
302                         unsigned long m;                                        
303                         for (m = 1; m && i < idx; m<<=1, page++, i++) {         
304                                 if (v & m) {                                    
305                                         count++;                                
306                                         __ClearPageReserved(page);              
307                                         set_page_refs(page, 0);                 
308                                         __free_page(page);                      
309                                 }                                               
310                         }                                                       
311                 } else {                                                        
312                         i+=BITS_PER_LONG;                                       
313                         page += BITS_PER_LONG;                                  
314                 }                                                               
315         }                                                                       
316         total += count;                                                         
317                                                                                 
318         /*                                                                      
319          * Now free the allocator bitmap itself, it's not                       
320          * needed anymore:                                                      
321          */                                                                     
322         page = virt_to_page(bdata->node_bootmem_map);                           
323         count = 0;                                                              
324         for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {                                                                               
325                 count++;                                                        
326                 __ClearPageReserved(page);                                      
327                 set_page_count(page, 1);                                        
328                 __free_page(page);                                              
329         }         
330         total += count;                                                         
331         bdata->node_bootmem_map = NULL;                                         
332                                                                                 
333         return total;                                                           
334 }                                                      

说明:
unsigned long v = ~map[i / BITS_PER_LONG]通过内存位图数组map查询32(2^5)个页的使用情况,如果v是0,则32个页都是没有使用可释放直接调用函数 __free_pages(page, order)(order = ffs(BITS_PER_LONG) - 1=5);如果不是0,则看v中哪些位是0,则释放相应位对应的页page,调用函数__free_page(page)。

释放单个页

调用流程:__free_page(page)------>__free_pages((page), 0)---->free_hot_page(page)---->free_hot_cold_page(page,0)–>free_pages_bulk(zone, pcp->batch, &pcp->list, 0)

__free_pages()函数

#define __free_page(page) __free_pages((page), 0)

 901 fastcall void __free_pages(struct page *page, unsigned int order)               
 902 {                                                                               
 903         if (!PageReserved(page) && put_page_testzero(page)) {                   
 904                 if (order == 0)                                                 
 905                         free_hot_page(page);                                    
 906                 else                                                            
 907                         __free_pages_ok(page, order);                           
 908         }                                                                       
 909 } 

先判断是释放单个页,还是多个页。如果单个页,则调用free_hot_page,如果是释放多个页,则调用__free_pages_ok。

free_hot_cold_page

 593 void fastcall free_hot_page(struct page *page)                                  
 594 {                                                                               
 595         free_hot_cold_page(page, 0);                                                                                                            
 596 } 

 570 static void fastcall free_hot_cold_page(struct page *page, int cold)            
 571 {                                                                               
 572         struct zone *zone = page_zone(page);                                    
 573         struct per_cpu_pages *pcp;                                              
 574         unsigned long flags;                                                    
 575                                                                                 
 576         arch_free_page(page, 0);                                                
 577                                                                                 
 578         kernel_map_pages(page, 1, 0);                                           
 579         inc_page_state(pgfree);                                                 
 580         if (PageAnon(page))                                                     
 581                 page->mapping = NULL;                                           
 582         free_pages_check(__FUNCTION__, page);                                   
 583         pcp = &zone->pageset[get_cpu()].pcp[cold];                              
 584         local_irq_save(flags);                                                  
 585         if (pcp->count >= pcp->high)                                            
 586                 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 
 587         list_add(&page->lru, &pcp->list);                                       
 588         pcp->count++;                                                                                                                           
 589         local_irq_restore(flags);                                               
 590         put_cpu();                                                              
 591 }

说明:
1)pcp->count >= pcp->high当per-cpu缓存的数量大于最高数量,则把数量pcp-batch的一批内存还给内存。通过list_add(&page->lru, &pcp->list)把这个页加入到per-cpu缓存页的链表中。

释放多个页

流程: __free_pages(page, order)---------->__free_pages_ok(page, order)----> free_pages_bulk(page_zone(page), 1, &list, order)

 236 static inline void __free_pages_bulk (struct page *page, struct page *base,     
 237                 struct zone *zone, unsigned int order)                                                   
 238 {                                                                                                        
 239         unsigned long page_idx;                                                                          
 240         struct page *coalesced;                                                                          
 241         int order_size = 1 << order;                                                                     
 242                                                                                                          
 243         if (unlikely(order))                                                                             
 244                 destroy_compound_page(page, order);                                                      
 245                                                                                                          
 246         page_idx = page - base;                                                                          
 247                                                                                                          
 248         BUG_ON(page_idx & (order_size - 1));                                                             
 249         BUG_ON(bad_range(zone, page));                                                                   
 250                                                                                                          
 251         zone->free_pages += order_size;                                                                  
 252         while (order < MAX_ORDER-1) {                                                                    
 253                 struct free_area *area;                                                                  
 254                 struct page *buddy;                                                                      
 255                 int buddy_idx; 
  256                                                                                 
 257                 buddy_idx = (page_idx ^ (1 << order));                          
 258                 buddy = base + buddy_idx;                                       
 259                 if(page== 0xc16fbc00)                                           
 260                         printk(KERN_ERR "tom  1 L=%d  page_idx=%x buddy_idx=%x M=%x\n",__LINE__,page_idx,buddy_idx,MAX_ORDER);
 261                                                                                 
 262                                                                                 
 263                 if(page== 0xc16fbc00)                                           
 264                 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x order=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private,order);
 265                                                                                 
 266                 if (bad_range(zone, buddy))                                     
 267                 {                                                               
 268                 if(page== 0xc16fbc00)                                           
 269                 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private);
 270                         break;                                                  
 271                 }                                                               
 272                 if (!page_is_buddy(buddy, order))                               
 273                 {                                                               
 274                 if(page== 0xc16fbc00)                                           
 275                 printk(KERN_ERR "tom L=%d page_idx=%x buddy_idx=%x page=%x buddy=%x buddy->private=%x\n",__LINE__,page_idx,buddy_idx,page,buddy,buddy->private);
 276                         break;                                                  
 277                 }                                                               
 278                 /* Move the buddy up one level. */                              
 279                 list_del(&buddy->lru);                                          
 280                 area = zone->free_area + order;                                 
 281                 area->nr_free--;                                                
 282                 rmv_page_order(buddy);                                          
 283                 page_idx &= buddy_idx;                                          
 284                 if(page== 0xc16fbc00)                                           
 285                         printk(KERN_ERR "tom  1 L=%d  page_idx=%x buddy_idx=%x M=%x\n",__LINE__,page_idx,buddy_idx,MAX_ORDER);
  286                 order++;                                                        
 287         }                                                                       
 288         coalesced = base + page_idx;                                            
 289         if(page== 0xc16fbc00)                                                   
 290                 printk(KERN_ERR "tom  L=%d  coalesced=%x order=%x\n",__LINE__,coalesced,order);
 291         set_page_order(coalesced, order);                                       
 292         list_add(&coalesced->lru, &zone->free_area[order].free_list);           
 293         zone->free_area[order].nr_free++;                                       
 294 }               

调试信息如下:

tom  1 L=260  page_idx=36de0 buddy_idx=36dc0 M=b
tom L=264 page_idx=36de0 buddy_idx=36dc0 page=c16fbc00 buddy=c16fb800 buddy->private=5 order=5
tom  1 L=285  page_idx=36dc0 buddy_idx=36dc0 M=b
tom  1 L=260  page_idx=36dc0 buddy_idx=36d80 M=b
tom L=264 page_idx=36dc0 buddy_idx=36d80 page=c16fbc00 buddy=c16fb000 buddy->private=6 order=6
tom  1 L=285  page_idx=36d80 buddy_idx=36d80 M=b
tom  1 L=260  page_idx=36d80 buddy_idx=36d00 M=b
tom L=264 page_idx=36d80 buddy_idx=36d00 page=c16fbc00 buddy=c16fa000 buddy->private=7 order=7
tom  1 L=285  page_idx=36d00 buddy_idx=36d00 M=b
tom  1 L=260  page_idx=36d00 buddy_idx=36c00 M=b
tom L=264 page_idx=36d00 buddy_idx=36c00 page=c16fbc00 buddy=c16f8000 buddy->private=8 order=8
tom  1 L=285  page_idx=36c00 buddy_idx=36c00 M=b
tom  1 L=260  page_idx=36c00 buddy_idx=36e00 M=b
tom L=264 page_idx=36c00 buddy_idx=36e00 page=c16fbc00 buddy=c16fc000 buddy->private=0 order=9
tom L=275 page_idx=36c00 buddy_idx=36e00 page=c16fbc00 buddy=c16fc000 buddy->private=0
tom  L=290  coalesced=c16f8000 order=9

流程图如下:
流程图
说明:
1)buddy_idx = (page_idx ^ (1 << order)),使用(1<<order)掩码的异或(XOR)转换page_idx第order位的值,如果这个位原先是0,buddy_idx就等于page_idx+order_size;相反,如果这个位原先是1,buddy_idx就等于page_idx-order_size.(order_size=2^order)
2)page_idx &= buddy_idx;就是page_idx在page_idx和buddy_idx取最小值,也就是page_idx在page_idx和buddy_idx中向小的移动了一下。
3)根据图中流程:
第一个流程图,page_idx=0x36c00,则buddy_idx=0x36c20,显然buddy_idx指向的页private=0,则把page_idx=0x36c00的private=5。
第二个流程图,page_idx=0x36c20,因为第5位是1,则buddy_idx=0x36c20-0x20=0x36c00,因为0x36c00指向的页private=5,则一起合并成6,并写进0x36c00中。
整个流程:刚开始页中的private为0,buddy_idx = (page_idx ^ (1 << order))加上order_size,buddy_idx指向的private为0;则下一次page_idx+=order_size(32),对应buddy_idx则减去order_size,buddy_idx指向的private为5,则可以合并成6,写进page_idx,因为page_idx &= buddy_idx,写进较小的页中的private中。
4)把页加入伙伴系统
4.1)把页描述符的flag中PG_private置为1,然后private置为空闲页的对数,也是order
4.2)把页的lru加入相应的zone->free_area[order].free_list中
4.3)增加zone->free_area[order].nr_free++
核心就是:
buddy_idx = (page_idx ^ (1 << order));

page_idx &= buddy_idx;
通过这两句话完成向前向后移动。

释放高端内存

调用流程:

mem_init----->set_highmem_pages_init(bad_ppro)—>one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro)—>__free_page(page)–>__free_pages((page), 0)---->free_hot_page(page)---->free_hot_cold_page(page,0)–>free_pages_bulk(zone, pcp->batch, &pcp->list, 0)

328 void __init set_highmem_pages_init(int bad_ppro)                                                                                                                                                              
329 {                                                                               
330         int pfn;                                                                
331         for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)                     
332                 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);             
333         totalram_pages += totalhigh_pages;                                      
334 } 

314 void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)                                                                                                                                       
315 {                                                                               
316         if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {          
317                             printk(KERN_ERR "one_highpage_init=%x\r\n",page);   
318                 ClearPageReserved(page);                                        
319                 set_bit(PG_highmem, &page->flags);                              
320                 set_page_count(page, 1);                                        
321                 __free_page(page);                                              
322                 totalhigh_pages++;                                              
323         } else                                                                  
324                 SetPageReserved(page);                                          
325 }   

结论

通过把低端内存和高端内存释放到伙伴系统中完成伙伴系统的初始化。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值