Linux 内核启动过程分析----zImage自解压

linchuanzhi_886

已于 2024-05-08 08:55:30 修改

阅读量2.1k

点赞数

CC 4.0 BY-SA版权

分类专栏： kernel linux 文章标签：内核-解压-启动

于 2015-04-04 12:01:52 首次发布

本文链接：https://blog.youkuaiyun.com/linchuanzhi_886/article/details/44872013

linux 同时被 2 个专栏收录

11 篇文章

订阅专栏

kernel

10 篇文章

订阅专栏

本文详细解析了uImage加载后如何跳转到vmlinux的过程，包括设置处理器模式、禁用中断、开启高速缓存、内存位置修正、判断自解压条件、清除未初始化的数据段(BSS)等关键步骤。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

要想移植内核，肯定是要知道内核的启动过程的，包括协处理器的操作。这对我们理解ARM工作方式、MMU配置，中断是很有帮助的。
上一篇分析过了，uboot引导的uImage，最先执行的函数的是由arch/arm/boot/compressed下的vmlinx.lds文件决定的。它就是_start函数，这个函数定义在arch/arm/boot/compressed/head.S中，那么就先分析arch/arm/boot/compressed/head.S吧。
这个head.S开头定义了一些跟调试相关的宏，先不考虑。

start:
		.type	start,#function
		.rept	7
		mov	r0, r0
		.endr
   ARM(		mov	r0, r0		)
   ARM(		b	1f		)
 THUMB(		adr	r12, BSYM(1f)	)
 THUMB(		bx	r12		)

		.word	0x016f2818		@ Magic numbers to help the loader
		.word	start			@ absolute load/run zImage address
		.word	_edata			@ zImage end address
 THUMB(		.thumb			)
1:		mov	r7, r1			@ save architecture ID
		mov	r8, r2			@ save atags pointer
//
/*arm/Makefile:117:KBUILD_AFLAGS	+=$(CFLAGS_ABI) $(AFLAGS_THUMB2) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
在unified.h中定义了
由于我们没有定义CONFIG_THUMB2_KERNEL，因此
#define ARM(x...)	x
#define THUMB(x...)
#define W(instr)	instr
#define BSYM(sym)	sym
*/

在start的开始处，使用rept 7，空出了个32位，现在还不明白他的目的，可能是为了留出异常向量表的位置。这里先不下决定。
//mov r7, r1 @ save architecture ID
// mov r8, r2 @ save atags pointer
这两句是关键，就是这两行保存了uboot传进来的参数。
uboot的最后执行的是thekernel(0,archid,bd->bi_bootatas);
而这个thekernel正好是跳转到了start这个位置。其中的参数不足4个，分别用r0/r1/r2寄存,其中r0=0;r1=archid,r2=atags参数的首地址。
这是第一步：将uboot传进来的arch id 和 atags指针保存在r7、r8中。

#ifndef __ARM_ARCH_2__
		/*
		 * Booting from Angel - need to enter SVC mode and disable
		 * FIQs/IRQs (numeric definitions from angel arm.h source).
		 * We only do this if we were in user mode on entry.
		 */
		mrs	r2, cpsr		@ get current mode
		tst	r2, #3			@ not user?在uboot阶段已经处于SVC模式
		bne	not_angel  //此处条件为真，跳转到not_angel
		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
 ARM(		swi	0x123456	)	@ angel_SWI_ARM
 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
not_angel:
		mrs	r2, cpsr		@ turn off interrupts to
		orr	r2, r2, #0xc0		@ prevent angel from running
		msr	cpsr_c, r2 
		//将CPSR的bit6bit7设置为1.对应的是I位和F位：当I=1时禁止IRQ中断，当F=1时禁止FIQ中断，因此作用就是关闭中断和快速中断
#else
		teqp	pc, #0x0c000003		@ turn off interrupts
#endif
	ldr	r4, =zreladdr //定义在arch/arm/cpu/mach-xxx/Makefile.boot，我这的值为		/*
			1 zreladdr-y   := 0x80008000
			2 params_phys-y   := 0x00000100
			3 initrd_phys-y   := 0x00800000
		*/
	bl	cache_on   //开启cache。

cache_on操作这一部分的代码如下：

cache_on:	mov	r3, #8			@ cache_on function
		b	call_cache_fn
call_cache_fn:	adr	r12, proc_types
#ifdef CONFIG_CPU_CP15
		mrc	p15, 0, r9, c0, c0	@ get processor ID
#else
		ldr	r9, =CONFIG_PROCESSOR_ID
#endif
1:		ldr	r1, [r12, #0]		@ get value
		ldr	r2, [r12, #4]		@ get mask
		eor	r1, r1, r9		@ (real ^ match)//显然如果r1==r9,那么r1=0
		tst	r1, r2			@       & mask //r1&r2=0？
 ARM(		addeq	pc, r12, r3		) @ call cache function
 THUMB(		addeq	r12, r3			)
 THUMB(		moveq	pc, r12			) @ call cache function
		add	r12, r12, #PROC_ENTRY_SIZE 
/*	compressed/head.S:613:#define PROC_ENTRY_SIZE (4*5)
因为proc_types开始处每隔5条arm指令就是下一个processer.
*/
		b	1b

/*
 * Table for cache operations.  This is basically:
 *   - CPU ID match
 *   - CPU ID mask
 *   - 'cache on' method instruction
 *   - 'cache off' method instruction
 *   - 'cache flush' method instruction
 *
 * We match an entry using: ((real_id ^ match) & mask) == 0
 *
 * Writethrough caches generally only need 'on' and 'off'
 * methods.  Writeback caches _must_ have the flush method
 * defined.
 */
		.align	2
		.type	proc_types,#object
proc_types:
		.word	0x41560600		@ ARM6/610
		.word	0xffffffe0
		W(b)	__arm6_mmu_cache_off	@ works, but slow
		W(b)	__arm6_mmu_cache_off
		mov	pc, lr
#if !defined(CONFIG_CPU_V7)
		/* This collides with some V7 IDs, preventing correct detection */
		.word	0x00000000		@ old ARM ID
		.word	0x0000f000
		mov	pc, lr
		mov	pc, lr
		mov	pc, lr
#endif

		.word	0x41007000		@ ARM7/710
		.word	0xfff8fe00
		W(b)	__arm7_mmu_cache_off
		W(b)	__arm7_mmu_cache_off
		mov	pc, lr

		.word	0x41807200		@ ARM720T (writethrough)
		.word	0xffffff00
		W(b)	__armv4_mmu_cache_on
		W(b)	__armv4_mmu_cache_off
		mov	pc, lr

	..../*此处定义了多种支持的processor*/.......
	
		.word	0x000f0000		@ new CPU Id
		.word	0x000f0000
		W(b)	__armv7_mmu_cache_on
		W(b)	__armv7_mmu_cache_off
		W(b)	__armv7_mmu_cache_flush
		.word	0			@ unrecognised type
		.word	0
		mov	pc, lr
 THUMB(		nop				)
		mov	pc, lr
 THUMB(		nop				)
		mov	pc, lr
 THUMB(		nop				)
		.size	proc_types, . - proc_types

上述条件如果匹配成功了，那么就会执行procss_type+8个字节(r3指定)处的指令，对于armv7处，执行的就是__armv7_mmu_cache_on。

		.word	0x000f0000		@ new CPU Id
		.word	0x000f0000
		W(b)	__armv7_mmu_cache_on
		W(b)	__armv7_mmu_cache_off
		W(b)	__armv7_mmu_cache_flush
		
__armv7_mmu_cache_on:
		mov	r12, lr         //保存返回地址到r12.
#ifdef CONFIG_MMU
		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
		tst	r11, #0xf		@ VMSA
		blne	__setup_mmu
		mov	r0, #0
		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
		tst	r11, #0xf		@ VMSA
		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
#endif
		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
		orr	r0, r0, #0x003c		@ write buffer
#ifdef CONFIG_MMU
#ifdef CONFIG_CPU_ENDIAN_BE8
		orr	r0, r0, #1 << 25	@ big-endian page tables
#endif
		orrne	r0, r0, #1		@ MMU enabled
		movne	r1, #-1
		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
#endif
		mcr	p15, 0, r0, c7, c5, 4	@ ISB
		mcr	p15, 0, r0, c1, c0, 0	@ load control register
		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
		mov	r0, #0
		mcr	p15, 0, r0, c7, c5, 4	@ ISB
		mov	pc, r12    //这里返回，返回到bl  	cache_on的下一条指令。

开启cache的过程中会设置MMU，完成chache_on的工作后，返回到了bl	cache_on的下一条指令。

bl	cache_on
		/*
			1 zreladdr-y   := 0x80008000
			2 params_phys-y   := 0x00000100
			3 initrd_phys-y   := 0x00800000
		*/

restart:
		adr	r0, LC0
		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
		ldr	sp, [r0, #28]

		/*
		 * We might be running at a different address.  We need
		 * to fix up various pointers.
		 */
		sub	r0, r0, r1		@ calculate the delta offset
		add	r6, r6, r0		@ _edata
		add	r10, r10, r0		@ inflated kernel size location
		/*r6存放了运行时_edata的地址，r10存放了运行时候input_data_end-4 的地址。*/
		/*
		 * The kernel build system appends the size of the
		 * decompressed kernel at the end of the compressed data
		 * in little-endian form.
		 */
		ldrb	r9, [r10, #0]
		ldrb	lr, [r10, #1]
		orr	r9, r9, lr, lsl #8
		ldrb	lr, [r10, #2]
		ldrb	r10, [r10, #3]
		orr	r9, r9, lr, lsl #16
		orr	r9, r9, r10, lsl #24
/*r9存放的就是r10中的值，经过小端模式转换后的。其实还是运行时的input_data_end -4*/
#ifndef CONFIG_ZBOOT_ROM 
		/*here is */
		/* malloc space is above the relocated stack (64k max) */
		add	sp, sp, r0
		add	r10, sp, #0x10000
#else
		/*
		 * With ZBOOT_ROM the bss/stack is non relocatable,
		 * but someone could still run this code from RAM,
		 * in which case our reference is _edata.
		 */
		mov	r10, r6
#endif

		mov	r5, #0			@ init dtb size to 0

/*
 * Check to see if we will overwrite ourselves.
 *   r4  = final kernel address
 *   r9  = size of decompressed image
 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 * We basically want:
 *   r4 - 16k page directory >= r10 -> OK
 *   r4 + image length <= address of wont_overwrite -> OK
 */
		add	r10, r10, #16384	/*16K : r10= _edat + 16K=0x82000000+_edat相对start的偏移,可以认为是image的大小，显然这个值一定是大于0x82000000*/
		cmp	r4, r10             /*此处r4=0x80008000*/
		bhs	wont_overwrite	//显然条件不成立。
		add	r10, r4, r9 //r10=0x80008000+size of decompressed image
		adr	r9, wont_overwrite
		cmp	r10, r9 /*因为我们加载内核的地址是0x8200_0000，因此此处wont_overwrite处于0x82000000以后的位置*/
		bls	wont_overwrite /*一般内核镜像不会太大，这里一般条件是成立的。r10<r9.*/

/*
 * Relocate ourselves past the end of the decompressed kernel.
 *   r6  = _edata
 *   r10 = end of the decompressed kernel
 * Because we always copy ahead, we need to do it from the end and go
 * backward in case the source and destination overlap.
 */
		/*
		 * Bump to the next 256-byte boundary with the size of
		 * the relocation code added. This avoids overwriting
		 * ourself when the offset is small.
		 */
		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
		bic	r10, r10, #255

		/* Get start of code we want to copy and align it down. */
		adr	r5, restart
		bic	r5, r5, #31

		sub	r9, r6, r5		@ size to copy
		add	r9, r9, #31		@ rounded up to a multiple
		bic	r9, r9, #31		@ ... of 32 bytes
		add	r6, r9, r5
		add	r9, r9, r10

1:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
		cmp	r6, r5
		stmdb	r9!, {r0 - r3, r10 - r12, lr}
		bhi	1b

		/* Preserve offset to relocated code. */
		sub	r6, r9, r6

#ifndef CONFIG_ZBOOT_ROM
		/* cache_clean_flush may use the stack, so relocate it */
		add	sp, sp, r6
#endif

		bl	cache_clean_flush

		adr	r0, BSYM(restart)
		add	r0, r0, r6
		mov	pc, r0

wont_overwrite:
/*
 * If delta is zero, we are running at the address we were linked at.
 *   r0  = delta
 *   r2  = BSS start
 *   r3  = BSS end
 *   r4  = kernel execution address
 *   r5  = appended dtb size (0 if not present)
 *   r7  = architecture ID
 *   r8  = atags pointer
 *   r11 = GOT start
 *   r12 = GOT end
 *   sp  = stack pointer
 */
		orrs	r1, r0, r5
		beq	not_relocated

		add	r11, r11, r0
		add	r12, r12, r0

#ifndef CONFIG_ZBOOT_ROM
		/*
		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
		 * we need to fix up pointers into the BSS region.
		 * Note that the stack pointer has already been fixed up.
		 */
		add	r2, r2, r0
		add	r3, r3, r0

		/*
		 * Relocate all entries in the GOT table.
		 * Bump bss entries to _edata + dtb size
		 */
1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
		add	r1, r1, r0		@ This fixes up C references
		cmp	r1, r2			@ if entry >= bss_start &&
		cmphs	r3, r1			@       bss_end > entry
		addhi	r1, r1, r5		@    entry += dtb size
		str	r1, [r11], #4		@ next entry
		cmp	r11, r12
		blo	1b

		/* bump our bss pointers too */
		add	r2, r2, r5
		add	r3, r3, r5

#else

		/*
		 * Relocate entries in the GOT table.  We only relocate
		 * the entries that are outside the (relocated) BSS region.
		 */
1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
		cmp	r1, r2			@ entry < bss_start ||
		cmphs	r3, r1			@ _end < entry
		addlo	r1, r1, r0		@ table.  This fixes up the
		str	r1, [r11], #4		@ C references.
		cmp	r11, r12
		blo	1b
#endif

not_relocated:	mov	r0, #0
1:		str	r0, [r2], #4		@ clear bss
		str	r0, [r2], #4
		str	r0, [r2], #4
		str	r0, [r2], #4
		cmp	r2, r3
		blo	1b

/*
 * The C runtime environment should now be setup sufficiently.
 * Set up some pointers, and start decompressing.
 *   r4  = kernel execution address
 *   r7  = architecture ID
 *   r8  = atags pointer
 */
		mov	r0, r4	//0x80008000
		mov	r1, sp			@ malloc space above stack
		add	r2, sp, #0x10000	@ 64k max
		mov	r3, r7	//archID
		bl	decompress_kernel
		bl	cache_clean_flush
		bl	cache_off
		mov	r0, #0			@ must be zero
		mov	r1, r7			@ restore architecture number
		mov	r2, r8			@ restore atags pointer
 ARM(		mov	pc, r4	)		@ call kernel
 THUMB(		bx	r4	)		@ entry point is always ARM

		.align	2
		.type	LC0, #object
LC0:		.word	LC0			@ r1
		.word	__bss_start		@ r2
		.word	_end			@ r3
		.word	_edata			@ r6
		.word	input_data_end - 4	@ r10 (inflated size location,sizeof arch/arm/boot/compressed/piggy.gzip)
		.word	_got_start		@ r11
		.word	_got_end		@ ip
		.word	.L_user_stack_end	@ sp
		.size	LC0, . - LC0

上述代码的功能
1：就是修正运行地址与链接地址之间的差异，包括got表。
.word __bss_start @ r2
.word _end @ r3
.word _edata @ r6
.word input_data_end - 4 @ r10 (inflated size location,sizeof arch/arm/boot/compressed/piggy.gzip)
.word _got_start @ r11
.word _got_end @ ip
.word .L_user_stack_end @ sp
这样才能使得我们的程序正确地运行。比如说我的_bss_start链接地址是0xC0012345.此处有个变量val=10;但是我们在运行uImage的时候，这个bss_start可能是跟链接地址是不同的，如运行的时候处于0x82012345.那么显然，如果我们要想得到val=10，就需要从0x82012345这里获得，而不是0xC0012345，因为0xC0012345的地方现在来说内容还是未知的。修正完了这些地址后，程序才能正常地运行。
功能2：
代码拷贝，拷贝范围是[restart,_edata]当然是32位对齐的。
从restart+sizeof copy 的地方拷贝到 size+ end of the decompressed kernel的地方。

not_relocated:	mov	r0, #0
1:		str	r0, [r2], #4		@ clear bss
		str	r0, [r2], #4
		str	r0, [r2], #4
		str	r0, [r2], #4
		cmp	r2, r3
		blo	1b

/*
 * The C runtime environment should now be setup sufficiently.
 * Set up some pointers, and start decompressing.
 *   r4  = kernel execution address
 *   r7  = architecture ID
 *   r8  = atags pointer
 */
		mov	r0, r4	//0x80008000
		mov	r1, sp			@ malloc space above stack
		add	r2, sp, #0x10000	@ 64k max
		mov	r3, r7	//archID
		bl	decompress_kernel
		bl	cache_clean_flush
		bl	cache_off
		mov	r0, #0			@ must be zero
		mov	r1, r7			@ restore architecture number
		mov	r2, r8			@ restore atags pointer
 ARM(		mov	pc, r4	)		@ call kernel
 THUMB(		bx	r4	)		@ entry point is always ARM

在执行上述代码过程中，寄存器的变化：
r10：运行时的input_data_end
r9: 运行时的input_data_end
r6: 运行时的_edata

r10:运行时的_edata
r10=运行时的_edata + 16K,0x82000000+
r4=0x80008000
if(r10<r4) wont_overwrite

r10=0x80008000+运行时的input_data_end
r9=wont_overwrite,0x8200_0000+
if(r10<r9) wont_overwrite 这个条件是真是假呢？
在我的板子上，因为input_data_end的位置比较靠前(如下的代码)，在vmlinux.lds可以看到，他在got表之前，所以这个条件一般都是成立的，跳转到了wont_overwrite 。然后又跳到了not_relocated，接着准备好参数，执行了decompress_kernel。

  1         .section .piggydata,#alloc
  2         .globl  input_data
  3 input_data:
  4         .incbin "arch/arm/boot/compressed/piggy.gzip"
  5         .globl  input_data_end
  6 input_data_end:
~                                                                               
~                                  
   . = TEXT_START;
 27   _text = .;
 28 
 29   .text : {
 30     _start = .;
 31     *(.start)
 32     *(.text)
 33     *(.text.*)
 34     *(.fixup)
 35     *(.gnu.warning)
 36     *(.glue_7t)
 37     *(.glue_7)
 38   }
 39   .rodata : {
 40     *(.rodata)
 41     *(.rodata.*)
 42   }
 43   .piggydata : {
 44     *(.piggydata)
 45   }
 46

如果不需要映射

则清楚bss段，并且执行了decompress_kernel函数解压内核。

void
decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
		unsigned long free_mem_ptr_end_p,
		int arch_id)
{
	int ret;

	output_data		= (unsigned char *)output_start;
	free_mem_ptr		= free_mem_ptr_p;
	free_mem_end_ptr	= free_mem_ptr_end_p;
	__machine_arch_type	= arch_id;

	arch_decomp_setup();

	putstr("Uncompressing Linux...");
	ret = do_decompress(input_data, input_data_end - input_data,
			    output_data, error);
	if (ret)
		error("decompressor returned an error");
	else
		putstr(" done, booting the kernel.\n");
}

完成内核解压后，又关闭cache，且继续保存uboot传进来的参数


		bl	decompress_kernel
		bl	cache_clean_flush
		bl	cache_off
		mov	r0, #0			@ must be zero
		mov	r1, r7			@ restore architecture number
		mov	r2, r8			@ restore atags pointer
		 ARM(		mov	pc, r4	)		@ call kernel

在以上整个过程中，MMU一直都处于关闭状态，打开cache可能是为了提高解压速度。在执行decompress_kernel的时候，由mov r0,r4 传入参数0，告诉解压函数的输出地址为0x80008000，最后又由 ARM( mov pc, r4 )，成功跳转到了解压后的内核vmlinux。
由上篇的分析可以知道，uImage是zImage加上64字节的头信息得到的，而zImage又是compressed下的vmlinux经过objcopy得到的，compressed下的vmlinux是由vmlinux.lds、 head.S 和 piggy.gzip.S misc.c编译而成的，其实就是在piggy.gzip中添加了解压代码。piggy.gzip是Image经过gzip -n -f -9得到的，Image是源码目录下的vmlinux经过objcopy后得到的。因此如果zImage进行自解压，解压后的指令序列跟源码目录下的vmlinux的指令序列就应该是一样的。所以，zImage进行自解压后,最后一句ARM( mov pc, r4 )就跳转到了源码根目录下的vmlinux中。其中decompress_kernel准确无误地将vmlinux的内容解压到了r4寄存器指定的地址。
总结一下：
1.判断是否为管理模式(条件当然是成立的,在uboot已经设置过了)
2.设置CPSR寄存器禁止IRQ和FIRQ
3.开启cache(要经过processor查找匹配)
4.fixup 段位置
5.判断是否满足内核自解压内存的环境。检查会不会自我覆盖，如果不会发生自我覆盖则走第6步。我们这里运行时的wont_overwrite指针大于0x80008000+piggy.gzip的大小。因此条件是成立的。直接走6.
6.清除BSS段。
7.设置栈，准备好r0-r3的参数，执行decompress_kernel
8.关闭cache
9.跳转至zaddr处。在arch/arm/mach-xxx/Makefile.boot中定义
执行vmlinux。

上面分析的就是从uImage到vmlinux的跳转过程了。下篇在介绍vmlinux。