在arm64 head.S中用汇编实现打印64位寄存器和字符的宏

环境:板子rp3399, 基于arm64 v8a, linux kernel版本4.4

尝试在head.S中添加两个宏:打印字符,打印一个64位寄存器

思路:

参考代码

/*
 * Preserve the arguments passed by the bootloader in x0 .. x3
 */
preserve_boot_args:
	mov	x21, x0				// x21=FDT

	adr_l	x0, boot_args			// record the contents of
	stp	x21, x1, [x0]			// x0 .. x3 at kernel entry
	stp	x2, x3, [x0, #16]

	dmb	sy				// needed before dc ivac with
						// MMU off

	add	x1, x0, #0x20			// 4 x 8 bytes
	b	__inval_cache_range		// tail call
ENDPROC(preserve_boot_args)

boot_args是在arch/arm64/kernel/setup.c中定义,用来保存寄存器x0-x3

所以为了防止使用宏的时候,没有保存寄存器上下文,那么也在arch/arm64/kernel/setup.c中定义变量regs_save[11],用来进入宏的时候保存寄存器,退出的时候还原寄存器

u64 __cacheline_aligned regs_save[11];  //save x1-x10 for call macro in head.S for test, regs_save[10] for another use

打印字符

print_char: 打印一个字符,这里rp3399使用了uart2,基地址是0xff1a0000,向UART_THR寄存器写入值,即可打印,其他板子需要换基地址

print_char代码如下:

/*
 * prin char. by xiawei for debug
 *
 *  tmp0:   tmp reg to save regs_save
 *  tmp1:   tmp reg for uart2 thr register
 *  tmp2:   tmp reg for char value
 *  value:  char value
 *  
 */
    .macro  print_char, tmp0, tmp1, tmp2, value
    adr_l	\tmp0, regs_save			// tmp save reg /tmp1, /tmp2
    stp	\tmp1,\tmp2,  [\tmp0,#32]	//saved in regs_save[4], regs_save[5]
    dmb	sy				// needed before dc ivac with
                                         // MMU off  
    mov \tmp1,#0xff1a0000
    mov \tmp2,\value
    str  \tmp2, [\tmp1]

    adr_l   \tmp0, regs_save
    ldp	\tmp1,\tmp2,  [\tmp0,#32]			
    dmb sy
    .endm

倒序打印寄存器

print_reg64:打印一个64位寄存器值,比如0x12345678aabbccdd会打印成DDCCBBAA87654321

打印寄存器值的流程是:

这里先不用宏print_reg64,方便理解,后面整理成宏,贴出代码如下:

/*
 * Setup the initial page tables. We only setup the barest amount which is
 * required to get the kernel running. The following sections are required:
 *   - identity mapping to enable the MMU (low address, TTBR0)
 *   - first few MB of the kernel linear mapping to jump to once the MMU has
 *     been enabled
 */
__create_page_tables:
    print_char x0, x1,x2,#0x42     //'B'
    print_char x0, x1,x2,#0x3a    // ':'

    //to print reg
    adr_l	x0, regs_save			// tmp save reg x1-x10
    stp	x1,x2,  [x0]			
    stp	x3, x4, [x0, #16]
    stp	x5, x6, [x0, #32]
    stp	x7, x8, [x0, #48]
    stp	x9, x10, [x0, #64]

    dmb	sy				// needed before dc ivac with
						// MMU off
    mov x3,#0
    ldr x4,=0x12345678aabbccdd   //for test  the print result is ddccbbaa87654321
    str x4,[x0,#72] 
    dmb	sy
111:    
    ldr x1, [x0,#72]                 //get data from address x0 to x1
    ubfx	x1, x1, #0, #4   //get x1[0:3], each time to get half byte, total 8 bytes
   
    mov x2, #9
    cmp x1, x2                //compared to 9
    b.le 222f
                                    //is 10-15
    add x1, x1, #0x37      //10+0x41-10    if num is 10, to print 'A'; 10--15 to print 'A'--'F'
    b 223f

222:                             //is 0--9
    add x1, x1, #0x30      //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:   
    print_char x5,x6,x7,x1    
    ldr x1, [x0,#72]
    lsr x1, x1, #4            //x1=x1>>4   get another half byte
    str x1,[x0,#72]          //save new x1 to [x0+#72]
    add x3,x3,#4
    cmp x3,#64
    b.lt 111b

    adr_l   x0, regs_save
    ldp	x1,x2,  [x0]			
    ldp	x3, x4, [x0, #16]
    ldp	x5, x6, [x0, #32]
    ldp	x7, x8, [x0, #48]
    ldp	x9, x10, [x0, #64]
    dmb sy

测试结果

由此可见,测试可行,然后整理成宏print_reg64,代码如下

/*
 * prin reg(64 bit) or data(64 bit). by xiawei for debug
 *
 *  reg:              the reg to print
 *  regs_save_addr:   store the address of regs_save, maybe x0 if not used, and not use x1-x8
 *  
 */
    .macro  print_reg64, reg64, regs_save_addr
    adr_l	\regs_save_addr, regs_save			// tmp save reg x1-x8
    stp	x1,x2,    [\regs_save_addr]			
    stp	x3, x4,   [\regs_save_addr, #16]
    stp	x5, x6,   [\regs_save_addr, #32]
    stp	x7, x8,   [\regs_save_addr, #48]
    
    dmb	sy				// needed before dc ivac with
						// MMU off
    mov x3,#0
    //ldr x4,=0x12345678aabbccdd   //for test  the print result is ddccbbaa87654321
    str \reg64,[\regs_save_addr,#72] 
    dmb	sy
111:    
    ldr x1, [\regs_save_addr,#72]                 //get data from address \regs_save_addr to x1
    ubfx	x1, x1, #0, #4   //get x1[0:3], each time to get half byte, total 8 bytes
   
    mov x2, #9
    cmp x1, x2                //compared to 9
    b.le 222f
                                    //is 10-15
    add x1, x1, #0x37      //10+0x41-10    if num is 10, to print 'A'; 10--15 to print 'A'--'F'
    b 223f

222:                             //is 0--9
    add x1, x1, #0x30      //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:   
    print_char x5,x6,x7,x1    
    ldr x1, [\regs_save_addr,#72]
    lsr x1, x1, #4            //x1=x1>>4   get another half byte
    str x1,[\regs_save_addr,#72]          //save new x1 to [\regs_save_addr+#72]
    add x3,x3,#4
    cmp x3,#64
    b.lt 111b

    adr_l   \regs_save_addr, regs_save
    ldp	x1,x2,    [\regs_save_addr]			
    ldp	x3, x4,   [\regs_save_addr, #16]
    ldp	x5, x6,   [\regs_save_addr, #32]
    ldp	x7, x8,   [\regs_save_addr, #48]
    dmb sy
    .endm

测试代码,在head.S中__create_page_tables添加测试代码print_char和print_reg64

/*
 * Setup the initial page tables. We only setup the barest amount which is
 * required to get the kernel running. The following sections are required:
 *   - identity mapping to enable the MMU (low address, TTBR0)
 *   - first few MB of the kernel linear mapping to jump to once the MMU has
 *     been enabled
 */
__create_page_tables:
    //test by xiawei for print char
    print_char x0, x1,x2,#0x42     //'B'
    print_char x0, x1,x2,#0x3a    // ':'
    //test by xiawei for print reg
    ldr x4,=0x12345678aabbccdd   //for test  the print result is DDCCBBAA87654321, because is little endian
    print_reg64 x4,x0

	adrp	x25, idmap_pg_dir
	adrp	x26, swapper_pg_dir
	mov	x28, lr

	/*
	 * Invalidate the idmap and swapper page tables to avoid potential
	 * dirty cache lines being evicted.
	 */
	mov	x0, x25
	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
	bl	__inval_cache_range

测试结果同上

使用print_reg64打印出_text(kernel起始地址), 为280000(打印代码参考下面的打印__idmap_text_end,这里省略。打印的结果为280000的倒序),对应加载地址kernel_addr_r,同样可知打印宏正确。

查看system.map

  

可知_text链接地址为ffffff8008080000, 偏差ffffff8008080000-280000=ffffff8007e00000

测试变量地址__idmap_text_end

所以打印结果应该为ffffff8008a17448 - ffffff8007e00000 = c17448 

在head.S中__create_page_tables添加测试代码

打印结果

84471C0000000000倒序即是c17448,打印正确 ,测试再次OK

正常打印寄存器

修改代码,正常打印寄存器

ubfx    x1, x1, #60, #4  取bit[60:63],即可打印高4位,

添加优化代码,减少宏里使用的寄存器个数,print_reg64参数reg64可以是任意reg, 参数regs_save_addr不能是x1,x2,x3,其他都可以,修改后的代码如下

/*
 * prin reg(64 bit) or data(64 bit). by xiawei for debug
 *
 *  reg:              the reg to print
 *  regs_save_addr:   store the address of regs_save,  and not use x1-x3
 *  
 */
    .macro  print_reg64, reg64, regs_save_addr
    adr_l	\regs_save_addr, regs_save			// tmp save reg x1-x3
    stp	x1,x2,    [\regs_save_addr]			//saved in regs_save[0], regs_save[1]
    str	x3,    [\regs_save_addr, #16]            //saved in regs_save[2]
 
    dmb	sy				// needed before dc ivac with
						// MMU off
    
    //ldr x4,=0x12345678aabbccdd   //for test  the print result is 12345678AABBCCDD
    str \reg64,[\regs_save_addr,#72]              //saved in regs_save[9]
    dmb	sy
    mov x3,#64
111:    
    ldr x1, [\regs_save_addr,#72]                 //get data from address \regs_save_addr to x1
    ubfx	x1, x1, #60, #4   //get x1[60:63], each time to get half byte, total 8 bytes
   
    mov x2, #9
    cmp x1, x2                //compared to 9
    b.le 222f
                                    //is 10-15
    add x1, x1, #0x37      //10+0x41-10    if num is 10, to print 'A'; 10--15 to print 'A'--'F'
    b 223f

222:                             //is 0--9
    add x1, x1, #0x30      //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:   
    print_char \regs_save_addr,x2,x3,x1    //because when entering print_char macro we save x2,x3 , 
                                         //and leaving print_char macro we restore x2,x3, so here we reuse x2,x3
    ldr x1, [\regs_save_addr,#72]
    lsl x1, x1, #4            //x1=x1<<4   get another half byte
    str x1,[\regs_save_addr,#72]          //save new x1 to [\regs_save_addr+#72]
    sub x3,x3,#4
    cmp x3,#0
    b.gt 111b

    adr_l   \regs_save_addr, regs_save
    ldp	x1,x2,    [\regs_save_addr]			
    ldr	x3,    [\regs_save_addr, #16]

    dmb sy
    .endm

测试代码

/*
 * Setup the initial page tables. We only setup the barest amount which is
 * required to get the kernel running. The following sections are required:
 *   - identity mapping to enable the MMU (low address, TTBR0)
 *   - first few MB of the kernel linear mapping to jump to once the MMU has
 *     been enabled
 */
__create_page_tables:
    //test by xiawei for print char
    print_char x0, x1,x2,#0x42     //'B'
    print_char x0, x1,x2,#0x3a    // ':'
    //test by xiawei for print reg
    ldr x4,=0x12345678aabbccdd   //for test  the print result is 12345678AABBCCDD
    print_reg64 x4,x0
     print_char x0, x1,x2,#0x3a    // ':'
    adr_l x1, _text
    print_reg64 x1,x6                 //should be 0x280000
    print_char x0, x1,x2,#0x3a    // ':'
    adr_l x1, __idmap_text_end
    print_reg64 x1,x6

	adrp	x25, idmap_pg_dir
	adrp	x26, swapper_pg_dir
	mov	x28, lr

	/*
	 * Invalidate the idmap and swapper page tables to avoid potential
	 * dirty cache lines being evicted.
	 */
	mov	x0, x25
	add	x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
	bl	__inval_cache_range

测试结果如下

system.map中,_text地址

实际物理地址0x280000,对应加载地址kernel_addr_r

算出偏移ffffff8008080000-280000=ffffff8007e00000

system.map中,__idmap_text_end地址 

ffffff8008a17448减去偏移ffffff8007e00000,得出地址c17448

和上面打印出的地址一致,测试结果OK 

注:经过多次测试发现实际最多能同时打印3个寄存器,第4个会打印不全,即代码中调用print_reg64次数小于4个,使用时注意

开启mmu后的打印

在调试到__enable_mmu中使能mmu后该函数卡死,添加的打印代码

 测试结果显示,开启了MMU后的打印没有显示出来

 分析原因应该是之前的串口发送地址0xff1a0000是物理地址,开启mmu后没有建立地址转换关系导致。

建立虚拟地址到物理地址(串口地址)的转换表:0xffffff80ff1a0000-->0xff1a0000, 转换原理参考之前文章head.S中__idmap_text_start 至__idmap_text_end映射

在__create_page_tables函数Map the kernel image之后添加,代码如下

/*
      test by xiawei stetup uatr_thr address (0xff1a0000) map : 0xffffff80ff1a0000-->0xff1a0000, for print after mmu enable
      virtual address 0xff1a0000: bit[38:30]=3 so the level 1 table index is 3, 
                                  bit[29:21] is the level 2 table index, 
      */
      ldr x3, =0xffffff80ff1a0000    //virtual address
      mov x4,x0                  //phy address  x0:swapper_pg_dir+4k
 
      /*first to setup level 1 table index 3, which entry is point to swapper_pg_dir+4k */
      lsr x3,x3,#30
      and x3,x3, #512-1      //level 1 table index
      orr x4, x4, #(3<<0)
      str	x4, [x26, x3, lsl #3]		// store the entry

      /*second setup level 2 table index*/
      ldr x3, =0xffffff80ff1a0000    //virtual address
      ldr x4, =0xff1a0000   //phy address
      	lsr	x4, x4, #21
	lsr	x3, x3, #21
	and	x3, x3, #512 - 1	// table index
	mov x6,#0xF01                         //device memory , nGnRnE
	orr	x4, x6, x4, lsl #SWAPPER_BLOCK_SHIFT	// table entry
       str	x4, [x0, x3, lsl #3]		// store the entry

/*   test by xiawei
            print_char x8, x1,x2,#0x3a    // ':'
            print_reg64 x26,x8                 //address of the level 1 table
            ldr x9, [x26, #24]                         //content of the (levwl 1 table + index3)
            print_char x8, x1,x2,#0x3a    // ':'
            print_reg64 x9,x8                //print the content of the (levwl 1 table + index3)
            lsr x9, x9, #12                          //level2 table address[47:12]
            lsl  x9,x9,#12                           //level2 table address

            ldr x10, [x9, x3, lsl #3]                         //content of the (level 2 table + index ) 
            print_char x8, x1,x2,#0x3a    // ':'
            print_reg64 x10,x8                 //print the content of the (level 2 table + index )
*/

复用level 1 table, 但index变为3。为了省一张level 2 table表,让index 3对应的表项地址为之前kernel image建立使用的level 2 table。即level 1 table的index 0和index 3指向同一个level 2 table。

level 2 table 添加新的index,该index对应entry的物理地址仍然为0xff1a0000。映射关系如下

 kernel img的映射关系参考之前文章head.S kernel image映射

然后添加新的打印宏print_char_mmu,print_reg64_mmu,宏里串口地址使用了虚拟地址0xffffff80ff1a0000,并且使用ldr指令替换adr指令取地址

/*
 * prin char after mmu on. by xiawei for debug
 *
 *  tmp0:   tmp reg to save regs_save
 *  tmp1:   tmp reg for uart2 thr register
 *  tmp2:   tmp reg for char value
 *  value:  char value
 *  
 */
    .macro  print_char_mmu, tmp0, tmp1, tmp2, value
    ldr   \tmp0, =regs_save            // tmp save reg /tmp1, /tmp2
    stp \tmp1,\tmp2,  [\tmp0,#32]   //saved in regs_save[4], regs_save[5]
    dmb sy              // needed before dc ivac with
                                         // MMU off  

    ldr \tmp1,=0xffffff80ff1a0000    
    mov \tmp2,\value
    str  \tmp2, [\tmp1]

    ldr   \tmp0, =regs_save
    ldp \tmp1,\tmp2,  [\tmp0,#32]           
    dmb sy
    .endm
    
/*
 * prin reg(64 bit) or data(64 bit) after mmu on. by xiawei for debug
 *
 *  reg:              the reg to print
 *  regs_save_addr:   store the address of regs_save,  and not use x1-x3
 *  
 */
    .macro  print_reg64_mmu, reg64, regs_save_addr
    ldr   \regs_save_addr, =regs_save          // tmp save reg x1-x3
    stp x1,x2,    [\regs_save_addr]         //saved in regs_save[0], regs_save[1]
    str x3,    [\regs_save_addr, #16]            //saved in regs_save[2]
 
    dmb sy              // needed before dc ivac with
                        // MMU off
    
    //ldr x4,=0x12345678aabbccdd   //for test  the print result is 12345678AABBCCDD
    str \reg64,[\regs_save_addr,#72]              //saved in regs_save[9]
    dmb sy
    mov x3,#64
111:    
    ldr x1, [\regs_save_addr,#72]                 //get data from address \regs_save_addr to x1
    ubfx    x1, x1, #60, #4   //get x1[60:63], each time to get half byte, total 8 bytes
   
    mov x2, #9
    cmp x1, x2                //compared to 9
    b.le 222f
                                    //is 10-15
    add x1, x1, #0x37      //10+0x41-10    if num is 10, to print 'A'; 10--15 to print 'A'--'F'
    b 223f

222:                             //is 0--9
    add x1, x1, #0x30      //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:   
    print_char_mmu \regs_save_addr,x2,x3,x1    //because when entering print_char macro we save x2,x3 , 
                                         //and leaving print_char macro we restore x2,x3, so here we reuse x2,x3
    ldr x1, [\regs_save_addr,#72]
    lsl x1, x1, #4            //x1=x1<<4   get another half byte
    str x1,[\regs_save_addr,#72]          //save new x1 to [\regs_save_addr+#72]
    sub x3,x3,#4
    cmp x3,#0
    b.gt 111b

    ldr   \regs_save_addr, =regs_save
    ldp x1,x2,    [\regs_save_addr]         
    ldr x3,    [\regs_save_addr, #16]

    dmb sy
    .endm
    

测试代码

打印结果

可以发现开启mmu之前_text地址为0x280000, 开启之后使用adr_l命令址为0x280000,使用ldr命令地址则为0xFFFFFF8008080000。

那么什么时候adr_l x8, _text的地址变为0xFFFFFF8008080000呢,是在__primary_switch:函数

 此时__primary_switched为虚拟地址,br之后进入__primary_switched函数,此时adr_l x8, _text的地址变为0xFFFFFF8008080000。

在__primary_switched函数入口添加测试代码(先将之前的测试代码注释掉)

 

 打印结果如下

结论

a.在开启mmu之前以及之后打印寄存器宏都可以正常执行。

b.验证了地址转换表建立的规则

c. ldr    x8, =__primary_switched                
    br    x8 

    这两步之后,adr_l取出的地址才是虚拟地址

后记:

实测发现print_char print_reg64宏同样适用于linux kernel 5.10中。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值