linux在被bootloader加载到内存后, cpu最初执行的内核代码是arch/x86/boot/header.S汇编文件中的_start例程,设置好头部header,其中包括大量的bootloader参数。接着是其中的start_of_setup例程,这个例程在做了一些准备工作后会通过call main跳转到arch/x86/boot/main.c:main()函数处执行,这就是众所周知的x86下的main函数,它们都工作在实模式下。在这个main函数中我们可以第一次看到与内存管理相关的代码,这段代码调用detect_memory()函数检测系统物理内存。如下:
- void main(void)
- {
- /* First, copy the boot header into the "zeropage" */
- copy_boot_params(); /* 把头部各参数复制到boot_params变量中 */
- /* End of heap check */
- init_heap();
- /* Make sure we have all the proper CPU support */
- if (validate_cpu()) {
- puts("Unable to boot - please use a kernel appropriate "
- "for your CPU.\n");
- die();
- }
- /* Tell the BIOS what CPU mode we intend to run in. */
- set_bios_mode();
- /* Detect memory layout */
- detect_memory(); /* 内存探测函数 */
- /* Set keyboard repeat rate (why?) */
- keyboard_set_repeat();
- /* Query MCA information */
- query_mca();
- /* Query Intel SpeedStep (IST) information */
- query_ist();
- /* Query APM information */
- #if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
- query_apm_bios();
- #endif
- /* Query EDD information */
- #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
- query_edd();
- #endif
- /* Set the video mode */
- set_video();
- /* Parse command line for 'quiet' and pass it to decompressor. */
- if (cmdline_find_option_bool("quiet"))
- boot_params.hdr.loadflags |= QUIET_FLAG;
- /* Do the last things and invoke protected mode */
- go_to_protected_mode();
- }
- int detect_memory(void)
- {
- int err = -1;
- if (detect_memory_e820() > 0)
- err = 0;
- if (!detect_memory_e801())
- err = 0;
- if (!detect_memory_88())
- err = 0;
- return err;
- }
- #define SMAP 0x534d4150 /* ASCII "SMAP" */
- static int detect_memory_e820(void)
- {
- int count = 0; /* 用于记录已检测到的物理内存数目 */
- struct biosregs ireg, oreg;
- struct e820entry *desc = boot_params.e820_map;
- static struct e820entry buf; /* static so it is zeroed */
- initregs(&ireg); /* 初始化ireg中的相关寄存器 */
- ireg.ax = 0xe820;
- ireg.cx = sizeof buf; /* e820entry数据结构大小 */
- ireg.edx = SMAP; /* 标识 */
- ireg.di = (size_t)&buf; /* int15返回值的存放处 */
- /*
- * Note: at least one BIOS is known which assumes that the
- * buffer pointed to by one e820 call is the same one as
- * the previous call, and only changes modified fields. Therefore,
- * we use a temporary buffer and copy the results entry by entry.
- *
- * This routine deliberately does not try to account for
- * ACPI 3+ extended attributes. This is because there are
- * BIOSes in the field which report zero for the valid bit for
- * all ranges, and we don't currently make any use of the
- * other attribute bits. Revisit this if we see the extended
- * attribute bits deployed in a meaningful way in the future.
- */
- do {
- /* 在执行这条内联汇编语句时输入的参数有:
- eax寄存器=0xe820
- dx寄存器=’SMAP’
- edi寄存器=desc
- ebx寄存器=next
- ecx寄存器=size
- 返回给c语言代码的参数有:
- id=eax寄存器
- rr=edx寄存器
- ext=ebx寄存器
- size=ecx寄存器
- desc指向的内存地址在执行0x15中断调用时被设置
- */
- intcall(0x15, &ireg, &oreg);
- ireg.ebx = oreg.ebx; /* 选择下一个 */
- /* BIOSes which terminate the chain with CF = 1 as opposed
- to %ebx = 0 don't always report the SMAP signature on
- the final, failing, probe. */
- if (oreg.eflags & X86_EFLAGS_CF)
- break;
- /* Some BIOSes stop returning SMAP in the middle of
- the search loop. We don't know exactly how the BIOS
- screwed up the map at that point, we might have a
- partial map, the full map, or complete garbage, so
- just return failure. */
- if (oreg.eax != SMAP) {
- count = 0;
- break;
- }
- *desc++ = buf; /* 将buf赋值给desc */
- count++; /* 探测数加一 */
- } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
- /* 将内存块数保持到变量中 */
- return boot_params.e820_entries = count;
- }
这里存放中断返回值的e820entry结构,以及表示内存图的e820map结构均位于arch/x86/include/asm/e820.h中,如下:
- struct e820entry {
- __u64 addr; /* 内存段的开始 */
- __u64 size; /* 内存段的大小 */
- __u32 type; /* 内存段的类型 */
- } __attribute__((packed));
- struct e820map {
- __u32 nr_map;
- struct e820entry map[E820_X_MAX];
- };
对于32位的系统,通过调用链arch/x86/boot/main.c:main()--->arch/x86/boot/pm.c:go_to_protected_mode()--->arch/x86/boot/pmjump.S:protected_mode_jump()--->arch/i386/boot/compressed/head_32.S:startup_32()--->arch/x86/kernel/head_32.S:startup_32()--->arch/x86/kernel/head32.c:i386_start_kernel()--->init/main.c:start_kernel(),到达众所周知的Linux内核启动函数start_kernel(),这里会调用setup_arch()完成与体系结构相关的一系列初始化工作,其中就包括各种内存的初始化工作,如内存图的建立、管理区的初始化等等。对x86体系结构,setup_arch()函数在arch/x86/kernel/setup.c中,如下:
- void __init setup_arch(char **cmdline_p)
- {
- /* ...... */
- x86_init.oem.arch_setup();
- setup_memory_map(); /* 建立内存图 */
- parse_setup_data();
- /* update the e820_saved too */
- e820_reserve_setup_data();
- /* ...... */
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- max_pfn = e820_end_of_ram_pfn(); /* 找出最大可用内存页面帧号 */
- /* preallocate 4k for mptable mpc */
- early_reserve_e820_mpc_new();
- /* update e820 for memory not covered by WB MTRRs */
- mtrr_bp_init();
- if (mtrr_trim_uncached_memory(max_pfn))
- max_pfn = e820_end_of_ram_pfn();
- #ifdef CONFIG_X86_32
- /* max_low_pfn在这里更新 */
- find_low_pfn_range(); /* 找出低端内存的最大页帧号 */
- #else
- num_physpages = max_pfn;
- /* ...... */
- /* max_pfn_mapped在这更新 */
- /* 初始化内存映射机制 */
- max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
- max_pfn_mapped = max_low_pfn_mapped;
- #ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
- #endif
- /* ...... */
- initmem_init(0, max_pfn); /* 启动内存分配器 */
- /* ...... */
- x86_init.paging.pagetable_setup_start(swapper_pg_dir);
- paging_init(); /* 建立完整的页表 */
- x86_init.paging.pagetable_setup_done(swapper_pg_dir);
- /* ...... */
- }
(1)建立内存图:setup_memory_map();
(2)调用e820_end_of_ram_pfn()找出最大可用页帧号max_pfn,调用find_low_pfn_range()找出低端内存区的最大可用页帧号max_low_pfn。
(2)初始化内存映射机制:init_memory_mapping();
(3)初始化内存分配器:initmem_init();
(4)建立完整的页表:paging_init()。
2、建立内存图
内存探测完之后,就要建立描述各内存块情况的全局内存图结构了。函数为setup_arch()--->arch/x86/kernel/e820.c:setup_memory_map(),如下:
- void __init setup_memory_map(void)
- {
- char *who;
- /* 调用x86体系下的memory_setup函数 */
- who = x86_init.resources.memory_setup();
- /* 保存到e820_saved中 */
- memcpy(&e820_saved, &e820, sizeof(struct e820map));
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
- /* 打印输出 */
- e820_print_map(who);
- }
- char *__init default_machine_specific_memory_setup(void)
- {
- char *who = "BIOS-e820";
- u32 new_nr;
- /*
- * 复制BIOS提供的e820内存图,否则伪造一个内存图:一块为0-640k,接着的
- * 下一块为1mb到appropriate_mem_k的大小
- */
- new_nr = boot_params.e820_entries;
- /* 将重叠的去除 */
- sanitize_e820_map(boot_params.e820_map,
- ARRAY_SIZE(boot_params.e820_map),
- &new_nr);
- /* 去掉重叠的部分后得到的内存块个数 */
- boot_params.e820_entries = new_nr;
- /* 将其复制到全局变量e820中,小于0时,为出错处理 */
- if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
- < 0) {
- u64 mem_size;
- /* compare results from other methods and take the greater */
- if (boot_params.alt_mem_k
- < boot_params.screen_info.ext_mem_k) {
- mem_size = boot_params.screen_info.ext_mem_k;
- who = "BIOS-88";
- } else {
- mem_size = boot_params.alt_mem_k;
- who = "BIOS-e801";
- }
- e820.nr_map = 0;
- e820_add_region(0, LOWMEMSIZE(), E820_RAM);
- e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
- }
- /* In case someone cares... */
- return who;
- }
- /*
- * 复制BIOS e820内存图到一个安全的地方。如果我们在里面,则要进行重叠检查
- * 如果我们用的是现代系统,则设置代码将给我们提供一个可以使用的内存图,以便
- * 用它来建立内存。如果不是现代系统,则将伪造一个内存图
- */
- static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
- {
- /* Only one memory region (or negative)? Ignore it */
- if (nr_map < 2)
- return -1;
- return __append_e820_map(biosmap, nr_map);
- }
- static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
- {
- while (nr_map) { /* 循环nr_map次调用,添加内存块到e820 */
- u64 start = biosmap->addr;
- u64 size = biosmap->size;
- u64 end = start + size;
- u32 type = biosmap->type;
- /* Overflow in 64 bits? Ignore the memory map. */
- if (start > end)
- return -1;
- /* 添加函数 */
- e820_add_region(start, size, type);
- biosmap++;
- nr_map--;
- }
- return 0;
- }
- void __init e820_add_region(u64 start, u64 size, int type)
- {
- __e820_add_region(&e820, start, size, type);
- }
- /*
- * 添加一个内存块到内存e820内存图中
- */
- static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
- int type)
- {
- int x = e820x->nr_map;
- if (x >= ARRAY_SIZE(e820x->map)) {
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
- return;
- }
- e820x->map[x].addr = start;
- e820x->map[x].size = size;
- e820x->map[x].type = type;
- e820x->nr_map++;
- }