Start_kernel()中调用了一系列初始化函数,以完成kernel本身的设置。这些动作有的是公共的,有的则是需要配置的才会执行的。
在start_kernel()函数中,
输出Linux版本信息(printk(linux_banner))
设置与体系结构相关的环境(setup_arch())
页表结构初始化(paging_init())
使用"arch/alpha/kernel/entry.S"中的入口点设置系统自陷入口(trap_init())
使用alpha_mv结构和entry.S入口初始化系统IRQ(init_IRQ())
核心进程调度器初始化(包括初始化几个缺省的Bottom-half,sched_init())
时间、定时器初始化(包括读取CMOS时钟、估测主频、初始化定时器中断等,time_init())
提取并分析核心启动参数(从环境变量中读取参数,设置相应标志位等待处理,(parse_options())
控制台初始化(为输出信息而先于PCI初始化,console_init())
剖析器数据结构初始化(prof_buffer和prof_len变量)
核心Cache初始化(描述Cache信息的Cache,kmem_cache_init())
延迟校准(获得时钟jiffies与CPU主频ticks的延迟,calibrate_delay())
内存初始化(设置内存上下界和页表项初始值,mem_init())
创建和设置内部及通用cache("slab_cache",kmem_cache_sizes_init())
创建uid taskcount SLAB cache("uid_cache",uidcache_init())
创建文件cache("files_cache",filescache_init())
创建目录cache("dentry_cache",dcache_init())
创建与虚存相关的cache("vm_area_struct","mm_struct",vma_init())
块设备读写缓冲区初始化(同时创建"buffer_head"cache用户加速访问,buffer_init())
创建页cache(内存页hash表初始化,page_cache_init())
创建信号队列cache("signal_queue",signals_init())
初始化内存inode表(inode_init())
创建内存文件描述符表("filp_cache",file_table_init())
检查体系结构漏洞(对于alpha,此函数为空,check_bugs())
SMP机器其余CPU(除当前引导CPU)初始化(对于没有配置SMP的内核,此函数为空,smp_init())
启动init过程(创建第一个核心线程,调用init()函数,原执行序列调用cpu_idle() 等待调度,init())
至此start_kernel()结束,基本的核心环境已经建立起来了。
- asmlinkage void __init start_kernel(void)
- {
- char * command_line;
- unsigned long mempages;
- extern char saved_command_line[];
- /*
- * Interrupts are still disabled. Do necessary setups, then
- * enable them
- */
- /*锁内核*/
- lock_kernel();
- /*打印内核的版本和编译的信息*/
- printk(linux_banner);
- /*解析内核的命令行中与内存相关的信息和内存分布信息*/
- setup_arch(&command_line);
- /*打印命令行信息*/
- printk("Kernel command line: %s/n", saved_command_line);
- /*解析传递给内核的命令行中的0号进程的程序名和环境变量*/
- parse_options(command_line);
- /*常用中断,异常的初始化*/
- trap_init();
- /*非常用的中断初始化*/
- init_IRQ();
- /*调度相关的计时器和底半部的初始化*/
- sched_init();
- /*时钟初始化*/
- time_init();
- /*软中断tasklet初始化*/
- softirq_init();
- /*
- * HACK ALERT! This is early. We're enabling the console before
- * we've done PCI setups etc, and console_init() must be aware of
- * this. But we do want output early, in case something goes wrong.
- */
- /*终端初始化*/
- console_init();
- #ifdef CONFIG_MODULES
- /*初始化模块symbol表大小*/
- init_modules();
- #endif
- if (prof_shift) {
- unsigned int size;
- /* only text is profiled */
- prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
- prof_len >>= prof_shift;
- size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
- prof_buffer = (unsigned int *) alloc_bootmem(size);
- }
- /*初始化slab分配器*/
- kmem_cache_init();
- sti();
- calibrate_delay();
- #ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start && !initrd_below_start_ok &&
- initrd_start < min_low_pfn << PAGE_SHIFT) {
- printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
- "disabling it./n",initrd_start,min_low_pfn << PAGE_SHIFT);
- initrd_start = 0;
- }
- #endif
- /*设置高端内存和内存的标志位*/
- mem_init();
- /*初始化内部和一般的slab分配器*/
- kmem_cache_sizes_init();
- #ifdef CONFIG_3215_CONSOLE
- con3215_activate();
- #endif
- #ifdef CONFIG_PROC_FS
- /*建立proc文件系统的目录*/
- proc_root_init();
- #endif
- mempages = num_physpages;
- /*初始化最大线程数*/
- fork_init(mempages);
- /*创建一些常用的slab分配器的数据结构*/
- proc_caches_init();
- vfs_caches_init(mempages);
- /*初始化buffer数据结构*/
- buffer_init(mempages);
- /*初始化页表的缓冲结构*/
- page_cache_init(mempages);
- kiobuf_setup();
- /*创建signal的slab数据结构*/
- signals_init();
- bdev_init();
- /*初始化文件系统的inode结构*/
- inode_init(mempages);
- #if defined(CONFIG_SYSVIPC)
- /*初始化sysv的信号量,消息,共享内存*/
- ipc_init();
- #endif
- #if defined(CONFIG_QUOTA)
- dquot_init_hash();
- #endif
- check_bugs();
- printk("POSIX conformance testing by UNIFIX/n");
- /*
- * We count on the initial thread going ok
- * Like idlers init is an unlocked kernel thread, which will
- * make syscalls (and thus be locked).
- */
- /*初始化SMP,主要是APIC的初始化*/
- smp_init();
- /*创建init进程*/
- kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
- unlock_kernel();
- current->need_resched = 1;
- /*运行idle进程,进行调度*/
- cpu_idle();
- }
- void __init setup_arch(char **cmdline_p)
- {
- unsigned long bootmap_size;
- unsigned long start_pfn, max_pfn, max_low_pfn;
- int i;
- #ifdef CONFIG_VISWS
- visws_get_board_type_and_rev();
- #endif
- /*将rootfs转化成kdev的表示形式,这里跟传统的表示没有不同*/
- ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
- drive_info = DRIVE_INFO;
- screen_info = SCREEN_INFO;
- apm_info.bios = APM_BIOS_INFO;
- /*将系统的描述信息写入全局变量中*/
- if( SYS_DESC_TABLE.length != 0 ) {
- MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
- machine_id = SYS_DESC_TABLE.table[0];
- machine_submodel_id = SYS_DESC_TABLE.table[1];
- BIOS_revision = SYS_DESC_TABLE.table[2];
- }
- aux_device_present = AUX_DEVICE_INFO;
- #ifdef CONFIG_BLK_DEV_RAM
- /*设置RAMDISK的标志*/
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
- #endif
- setup_memory_region();
- if (!MOUNT_ROOT_RDONLY)
- root_mountflags &= ~MS_RDONLY;
- /*在init_mm存放数据段,代码段和堆栈段的起始地址,结束地址*/
- init_mm.start_code = (unsigned long) &_text;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
- /*存放内核和数据段的起始和结束地址,这里是转化为物理地址存放的*/
- code_resource.start = virt_to_bus(&_text);
- code_resource.end = virt_to_bus(&_etext)-1;
- data_resource.start = virt_to_bus(&_etext);
- data_resource.end = virt_to_bus(&_edata)-1;
- /*解析命令行中的"mem="参数*/
- parse_mem_cmdline(cmdline_p);
- #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
- #define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
- #define PFN_PHYS(x) ((x) << PAGE_SHIFT)
- /*
- * 128MB for vmalloc and initrd
- */
- #define VMALLOC_RESERVE (unsigned long)(128 << 20)
- #define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
- #define MAXMEM_PFN PFN_DOWN(MAXMEM)
- #define MAX_NONPAE_PFN (1 << 20)
- /*
- * partially used pages are not usable - thus
- * we are rounding upwards:
- */
- /*找到起始物理页号*/
- start_pfn = PFN_UP(__pa(&_end));
- /*
- * Find the highest page frame number we have available
- */
- /*从E820中找到最高物理页号*/
- max_pfn = 0;
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long start, end;
- /* RAM? */
- if (e820.map[i].type != E820_RAM)
- continue;
- start = PFN_UP(e820.map[i].addr);
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- if (start >= end)
- continue;
- if (end > max_pfn)
- max_pfn = end;
- }
- /*
- * Determine low and high memory ranges:
- */
- /*找到最高的低端物理页号,既896M对应的物理页号*/
- max_low_pfn = max_pfn;
- if (max_low_pfn > MAXMEM_PFN) {
- max_low_pfn = MAXMEM_PFN;
- #ifndef CONFIG_HIGHMEM
- /* Maximum memory usable is what is directly addressable */
- printk(KERN_WARNING "Warning only %ldMB will be used./n",
- MAXMEM>>20);
- if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a PAE enabled kernel./n");
- else
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel./n");
- #else /* !CONFIG_HIGHMEM */
- #ifndef CONFIG_X86_PAE
- if (max_pfn > MAX_NONPAE_PFN) {
- max_pfn = MAX_NONPAE_PFN;
- printk(KERN_WARNING "Warning only 4GB will be used./n");
- printk(KERN_WARNING "Use a PAE enabled kernel./n");
- }
- #endif /* !CONFIG_X86_PAE */
- #endif /* !CONFIG_HIGHMEM */
- }
- /*设置高端内存的起始和结束地址*/
- #ifdef CONFIG_HIGHMEM
- highstart_pfn = highend_pfn = max_pfn;
- if (max_pfn > MAXMEM_PFN) {
- highstart_pfn = MAXMEM_PFN;
- printk(KERN_NOTICE "%ldMB HIGHMEM available./n",
- pages_to_mb(highend_pfn - highstart_pfn));
- }
- #endif
- /*
- * Initialize the boot-time allocator (with low memory only):
- */
- /*初始化896M以下的boot内存*/
- bootmap_size = init_bootmem(start_pfn, max_low_pfn);
- /*
- * Register fully available low RAM pages with the bootmem allocator.
- */
- /*将896M以下的内存设为可用状态*/
- for (i = 0; i < e820.nr_map; i++) {
- unsigned long curr_pfn, last_pfn, size;
- /*
- * Reserve usable low memory
- */
- if (e820.map[i].type != E820_RAM)
- continue;
- /*
- * We are rounding up the start address of usable memory:
- */
- curr_pfn = PFN_UP(e820.map[i].addr);
- if (curr_pfn >= max_low_pfn)
- continue;
- /*
- * ... and at the end of the usable range downwards:
- */
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
- if (last_pfn > max_low_pfn)
- last_pfn = max_low_pfn;
- /*
- * .. finally, did all the rounding and playing
- * around just make the area go away?
- */
- if (last_pfn <= curr_pfn)
- continue;
- size = last_pfn - curr_pfn;
- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
- }
- /*
- * Reserve the bootmem bitmap itself as well. We do this in two
- * steps (first step was init_bootmem()) because this catches
- * the (very unlikely) case of us accidentally initializing the
- * bootmem allocator with an invalid RAM area.
- */
- /*保留bootmem自己的内存*/
- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
- bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
- /*
- * reserve physical page 0 - it's a special BIOS page on many boxes,
- * enabling clean reboots, SMP operation, laptop functions.
- */
- /*保留第一个物理页*/
- reserve_bootmem(0, PAGE_SIZE);
- #ifdef CONFIG_SMP
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
- smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
- #endif
- #ifdef CONFIG_X86_IO_APIC
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
- #endif
- /*初始化构建页表*/
- paging_init();
- #ifdef CONFIG_X86_IO_APIC
- /*
- * get boot-time SMP configuration:
- */
- if (smp_found_config)
- get_smp_config();
- #endif
- #ifdef CONFIG_X86_LOCAL_APIC
- init_apic_mappings();
- #endif
- #ifdef CONFIG_BLK_DEV_INITRD
- /*将RAMDISK的空间保留下来*/
- if (LOADER_TYPE && INITRD_START) {
- if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
- reserve_bootmem(INITRD_START, INITRD_SIZE);
- initrd_start =
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
- initrd_end = initrd_start+INITRD_SIZE;
- }
- else {
- printk("initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)/ndisabling initrd/n",
- INITRD_START + INITRD_SIZE,
- max_low_pfn << PAGE_SHIFT);
- initrd_start = 0;
- }
- }
- #endif
- /*
- * Request address space for all standard RAM and ROM resources
- * and also for regions reported as reserved by the e820.
- */
- /*将ROM加入资源列表中*/
- probe_roms();
- /*将RAM等各种资源加入列表中*/
- for (i = 0; i < e820.nr_map; i++) {
- struct resource *res;
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
- continue;
- res = alloc_bootmem_low(sizeof(struct resource));
- switch (e820.map[i].type) {
- case E820_RAM: res->name = "System RAM"; break;
- case E820_ACPI: res->name = "ACPI Tables"; break;
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
- default: res->name = "reserved";
- }
- res->start = e820.map[i].addr;
- res->end = res->start + e820.map[i].size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- request_resource(&iomem_resource, res);
- if (e820.map[i].type == E820_RAM) {
- /*
- * We dont't know which RAM region contains kernel data,
- * so we try it repeatedly and let the resource manager
- * test it.
- */
- request_resource(res, &code_resource);
- request_resource(res, &data_resource);
- }
- }
- request_resource(&iomem_resource, &vram_resource);
- /* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
- request_resource(&ioport_resource, standard_io_resources+i);
- #ifdef CONFIG_VT
- #if defined(CONFIG_VGA_CONSOLE)
- conswitchp = &vga_con;
- #elif defined(CONFIG_DUMMY_CONSOLE)
- conswitchp = &dummy_con;
- #endif
- #endif
- }