linux 启动之setup_arch函数(二)

本文详细解析了内核初始化阶段的内存管理与映射过程,包括页目录、页表、初始化分配和映射操作等核心概念。重点讨论了如何在不同场景下进行内存映射,如保留内存、分配大页、使用段映射等,并介绍了设备映射与内存资源请求的实现。此外,文章还涉及了CPU堆栈初始化和向量表的搬移,为深入理解Linux内核启动过程提供了一手资料。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

01static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
02          unsigned long end, unsigned long phys,
03          const struct mem_type *type)
04{
05 pmd_t *pmd = pmd_offset(pgd, addr);
06
07 /*
08  * Try a section mapping - end, addr and phys must all be aligned
09  * to a section boundary.  Note that PMDs refer to the individual
10  * L1 entries, whereas PGDs refer to a group of L1 entries making
11  * up one logical pointer to an L2 table.
12  */
13 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
14  pmd_t *p = pmd;
15
16  if (addr & SECTION_SIZE)
17   pmd++;
18
19  do {
20   *pmd = __pmd(phys | type->prot_sect);
21   phys += SECTION_SIZE;
22  } while (pmd++, addr += SECTION_SIZE, addr != end);
23
24  flush_pmd_entry(p);
25 } else {
26  /*
27   * No need to loop; pte's aren't interested in the
28   * individual L1 entries.
29   */
30  alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
31 }
32} 

第5行取出页表的值,在pgd中使用索引号取出是页表的值。
第13行进行检查
第14行一级赋予pmd类型的指针
第19行循环进行映射,循环多少次,是循环0xc0008000-0xc0007000这么多次。
条21行mmu的映射可以有四中方式进行,段,大页,小页,无映射,在这里面使用了段的映射。以1M的段进行映射,映射后的物理
第22行页表是以4个字节的步长进行增加
第30行以大页方式进行映射

01static unsigned long __init bootmap_bytes(unsigned long pages)
02{                                                                                                           
03 unsigned long bytes = (pages + 7) / 8;                      
04                                                              
05 return ALIGN(bytes, sizeof(long));                          
06}  
07unsigned long __init bootmem_bootmap_pages(unsigned long pages)
08{
09 unsigned long bytes = bootmap_bytes(pages);   
10 return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
11}

第3行计算出需要多少字节,这里的加7是为了字节对齐,如果只有一位,也需要分配一个字节
第4行返回以8个字节对齐方式
第9行得到里面的字节
第10总字节数除以4K,得到需要页数为4,所以可以有这么多位图4*1024*8

02static unsigned int __init find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
03{
04 unsigned int start_pfn, i, bootmap_pfn;
05
06 start_pfn   = PAGE_ALIGN(__pa(_end)) >> PAGE_SHIFT;
07 bootmap_pfn = 0;
08
09 for_each_nodebank(i, mi, node) {
10  struct membank *bank = &mi->bank[i];
11  unsigned int start, end;
12
13  start = bank_pfn_start(bank);
14  end   = bank_pfn_end(bank);
15
16  if (end < start_pfn)
17   continue;
18
19  if (start < start_pfn)
20   start = start_pfn;
21
22  if (end <= start)
23   continue;
24
25  if (end - start >= bootmap_pages) {
26   bootmap_pfn = start;
27   break;
28  }
29 }
30
31 if (bootmap_pfn == 0)
32  BUG();
33
34 return bootmap_pfn;
35}

第6行从内核的映像文件结束位置开始,第一个可以使用的页帧号,PAGE_ALIGN宏是页对齐方式。
第9行循环找出每个结点对应的内存,并记录下开始的页帧号和结束的页帧号。
第13行开始的页帧号
第14行结束的页帧号
第34行返回可以用的第一个页帧号


 

01unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
02    unsigned long startpfn, unsigned long endpfn)
03{
04 return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
05}
06static void __init link_bootmem(bootmem_data_t *bdata)
07{
08 struct list_head *iter;
09
10 list_for_each(iter, &bdata_list) {
11  bootmem_data_t *ent;
12
13  ent = list_entry(iter, bootmem_data_t, list);
14  if (bdata->node_min_pfn < ent->node_min_pfn)
15   break;
16 }
17 list_add_tail(&bdata->list, iter);
18}
19static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
20 unsigned long mapstart, unsigned long start, unsigned long end)
21{
22 unsigned long mapsize;
23
24 mminit_validate_memmodel_limits(&start, &end);
25 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
26 bdata->node_min_pfn = start;
27 bdata->node_low_pfn = end;
28 link_bootmem(bdata);
29
30 /*
31  * Initially all pages are reserved - setup_arch() has to
32  * register free RAM areas explicitly.
33  */
34 mapsize = bootmap_bytes(end - start);
35 memset(bdata->node_bootmem_map, 0xff, mapsize);
36
37 bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
38  bdata - bootmem_node_data, start, mapstart, end, mapsize);
39
40 return mapsize;
41}
42
43 

这个函数主要返回要映射的大小,以字节为单位。
第26行开始的页帧号
第34行 计算出映射的长度
第35行,用1来初始化。 
第40行返回映射的大小,其实这个是页表的大小16K=4*4*1024.

01void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
02         unsigned long size)
03{
04 unsigned long start, end;
05
06 start = PFN_UP(physaddr);
07 end = PFN_DOWN(physaddr + size);
08
09 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
10}
11static int __init mark_bootmem_node(bootmem_data_t *bdata,
12    unsigned long start, unsigned long end,
13    int reserve, int flags)
14{
15 unsigned long sidx, eidx;
16
17 bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
18  bdata - bootmem_node_data, start, end, reserve, flags);
19
20 BUG_ON(start < bdata->node_min_pfn);
21 BUG_ON(end > bdata->node_low_pfn);
22
23 sidx = start - bdata->node_min_pfn;
24 eidx = end - bdata->node_min_pfn;
25
26 if (reserve)
27  return __reserve(bdata, sidx, eidx, flags);
28 else
29  __free(bdata, sidx, eidx);
30 return 0;
31}
32static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
33   unsigned long eidx, int flags)
34{
35 unsigned long idx;
36 int exclusive = flags & BOOTMEM_EXCLUSIVE;
37
38 bdebug("nid=%td start=%lx end=%lx flags=%x\n",
39  bdata - bootmem_node_data,
40  sidx + bdata->node_min_pfn,
41  eidx + bdata->node_min_pfn,
42  flags);
43
44 for (idx = sidx; idx < eidx; idx++)
45  if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
46   if (exclusive) {
47    __free(bdata, sidx, idx);
48    return -EBUSY;
49   }
50   bdebug("silent double reserve of PFN %lx\n",
51    idx + bdata->node_min_pfn);
52  }
53 return 0;
54}
55  

第23行开始的索引号为0x00000000
第24行结束的索引号为0x00004000.
第26行如果是保留这个标识为真,就进行保留,开始分配的内核空间,是需要保留的。
第28-29行如果为假,就进行内存的回收,怎么回收,只需要在位图上相应的位置0,就可以了。

01int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
02     unsigned long size, int flags)
03{
04 unsigned long start, end;
05
06 start = PFN_DOWN(physaddr);
07 end = PFN_UP(physaddr + size);
08
09 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
10}

第6行计算出开始的页帧号。
第7行计算出结束的页帧号。
第9行调用这个函数来标识出哪些空间需要保留。

01static void __init devicemaps_init(struct machine_desc *mdesc)
02{
03 struct map_desc map;
04 unsigned long addr;
05 void *vectors;
06
07 /*
08  * Allocate the vector page early.
09  */
10 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
11
12 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
13  pmd_clear(pmd_off_k(addr));
14
15 /*
16  * Map the kernel if it is XIP.
17  * It is always first in the modulearea.
18  */
19#ifdef CONFIG_XIP_KERNEL
20 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
21 map.virtual = MODULES_VADDR;
22 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
23 map.type = MT_ROM;
24 create_mapping(&map);
25#endif
26
27 /*
28  * Map the cache flushing regions.
29  */
30#ifdef FLUSH_BASE
31 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
32 map.virtual = FLUSH_BASE;
33 map.length = SZ_1M;
34 map.type = MT_CACHECLEAN;
35 create_mapping(&map);
36#endif
37#ifdef FLUSH_BASE_MINICACHE
38 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
39 map.virtual = FLUSH_BASE_MINICACHE;
40 map.length = SZ_1M;
41 map.type = MT_MINICLEAN;
42 create_mapping(&map);
43#endif
44
45 /*
46  * Create a mapping for the machine vectors at the high-vectors
47  * location (0xffff0000).  If we aren't using high-vectors, also
48  * create a mapping at the low-vectors virtual address.
49  */
50 map.pfn = __phys_to_pfn(virt_to_phys(vectors));
51 map.virtual = 0xffff0000;
52 map.length = PAGE_SIZE;
53 map.type = MT_HIGH_VECTORS;
54 create_mapping(&map);
55
56 if (!vectors_high()) {
57  map.virtual = 0;
58  map.type = MT_LOW_VECTORS;
59  create_mapping(&map);
60 }
61
62 /*
63  * Ask the machine support to map in the statically mapped devices.
64  */
65 if (mdesc->map_io)
66  mdesc->map_io();
67
68 /*
69  * Finally flush the caches and tlb to ensure that we're in a
70  * consistent state wrt the writebuffer.  This also ensures that
71  * any write-allocated cache lines in the vector page are written
72  * back.  After this point, we can start to touch devices again.
73  */
74 local_flush_tlb_all();
75 flush_cache_all();
76}

第10行使用 alloc分配器
对设备的映射,和对内存的映射相似。

01static void __init kmap_init(void)
02{
03#ifdef CONFIG_HIGHMEM
04 pmd_t *pmd = pmd_off_k(PKMAP_BASE);
05 pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
06 BUG_ON(!pmd_none(*pmd) || !pte);
07 __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
08 pkmap_page_table = pte + PTRS_PER_PTE;
09#endif
10}

如果有高端内存,进行永久内核映射

01static void __init
02request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc)
03{
04 struct resource *res;
05 int i;
06
07 kernel_code.start   = virt_to_phys(_text);
08 kernel_code.end     = virt_to_phys(_etext - 1);
09 kernel_data.start   = virt_to_phys(_data);
10 kernel_data.end     = virt_to_phys(_end - 1);
11
12 for (i = 0; i < mi->nr_banks; i++) {
13  if (mi->bank[i].size == 0)
14   continue;
15
16  res = alloc_bootmem_low(sizeof(*res));
17  res->name  = "System RAM";
18  res->start = mi->bank[i].start;
19  res->end   = mi->bank[i].start + mi->bank[i].size - 1;
20  res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
21
22  request_resource(&iomem_resource, res);
23
24  if (kernel_code.start >= res->start &&
25      kernel_code.end <= res->end)
26   request_resource(res, &kernel_code);
27  if (kernel_data.start >= res->start &&
28      kernel_data.end <= res->end)
29   request_resource(res, &kernel_data);
30 }
31
32 if (mdesc->video_start) {
33  video_ram.start = mdesc->video_start;
34  video_ram.end   = mdesc->video_end;
35  request_resource(&iomem_resource, &video_ram);
36 }
37
38 /*
39  * Some machines don't have the possibility of ever
40  * possessing lp0, lp1 or lp2
41  */
42 if (mdesc->reserve_lp0)
43  request_resource(&ioport_resource, &lp0);
44 if (mdesc->reserve_lp1)
45  request_resource(&ioport_resource, &lp1);
46 if (mdesc->reserve_lp2)
47  request_resource(&ioport_resource, &lp2);
48}

第4行定义一个资源的结构体
第7行kernel的代码段的物理开始地址
第8行kernel的代码段的物理结束地址
第9行kernel的数据段的物理开始地址
第10行kernel的数据段的物理结束地址
第16行使用alloc分配器分配空间
第17-21行对资源进行初始化
第22行调用请求函数,请求资源,这里我的理解是内存映射到映射内存
第24-26行如果满足条件对kernel的代码段映射到内存上物理地址
第27-30行如果满足条件对kernel的数据段映射到内存上物理地址

01void cpu_init(void)
02{
03 unsigned int cpu = smp_processor_id();
04 struct stack *stk = &stacks[cpu];
05
06 if (cpu >= NR_CPUS) {
07  printk(KERN_CRIT "CPU%u: bad primary CPU number\n", cpu);
08  BUG();
09 }
10
11 /*
12  * setup stacks for re-entrant exception handlers
13  */
14 __asm__ (
15 "msr cpsr_c, %1\n\t"
16 "add sp, %0, %2\n\t"
17 "msr cpsr_c, %3\n\t"
18 "add sp, %0, %4\n\t"
19 "msr cpsr_c, %5\n\t"
20 "add sp, %0, %6\n\t"
21 "msr cpsr_c, %7"
22     :
23     : "r" (stk),
24       "I" (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
25       "I" (offsetof(struct stack, irq[0])),
26       "I" (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
27       "I" (offsetof(struct stack, abt[0])),
28       "I" (PSR_F_BIT | PSR_I_BIT | UND_MODE),
29       "I" (offsetof(struct stack, und[0])),
30       "I" (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
31     : "r14");
32}

主要对CPU的堆栈进行初始化。

01void __init early_trap_init(void)
02{
03 unsigned long vectors = CONFIG_VECTORS_BASE;
04 extern char __stubs_start[], __stubs_end[];
05 extern char __vectors_start[], __vectors_end[];
06 extern char __kuser_helper_start[], __kuser_helper_end[];
07 int kuser_sz = __kuser_helper_end - __kuser_helper_start;
08
09 /*
10  * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
11  * into the vector page, mapped at 0xffff0000, and ensure these
12  * are visible to the instruction stream.
13  */
14 memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
15 memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
16 memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
17
18 /*
19  * Copy signal return handlers into the vector page, and
20  * set sigreturn to be a pointer to these.
21  */
22 memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
23        sizeof(sigreturn_codes));
24
25 flush_icache_range(vectors, vectors + PAGE_SIZE);
26 modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
27}

来实现把向量表搬移到0xffff0000这个位置上。通过使用memcpy函数来实现。


                                                   

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值