环境:板子rp3399, 基于arm64 v8a, linux kernel版本4.4
尝试在head.S中添加两个宏:打印字符,打印一个64位寄存器
思路:
参考代码
/*
* Preserve the arguments passed by the bootloader in x0 .. x3
*/
preserve_boot_args:
mov x21, x0 // x21=FDT
adr_l x0, boot_args // record the contents of
stp x21, x1, [x0] // x0 .. x3 at kernel entry
stp x2, x3, [x0, #16]
dmb sy // needed before dc ivac with
// MMU off
add x1, x0, #0x20 // 4 x 8 bytes
b __inval_cache_range // tail call
ENDPROC(preserve_boot_args)
boot_args是在arch/arm64/kernel/setup.c中定义,用来保存寄存器x0-x3
所以为了防止使用宏的时候,没有保存寄存器上下文,那么也在arch/arm64/kernel/setup.c中定义变量regs_save[11],用来进入宏的时候保存寄存器,退出的时候还原寄存器
u64 __cacheline_aligned regs_save[11]; //save x1-x10 for call macro in head.S for test, regs_save[10] for another use
打印字符
print_char: 打印一个字符,这里rp3399使用了uart2,基地址是0xff1a0000,向UART_THR寄存器写入值,即可打印,其他板子需要换基地址
print_char代码如下:
/*
* prin char. by xiawei for debug
*
* tmp0: tmp reg to save regs_save
* tmp1: tmp reg for uart2 thr register
* tmp2: tmp reg for char value
* value: char value
*
*/
.macro print_char, tmp0, tmp1, tmp2, value
adr_l \tmp0, regs_save // tmp save reg /tmp1, /tmp2
stp \tmp1,\tmp2, [\tmp0,#32] //saved in regs_save[4], regs_save[5]
dmb sy // needed before dc ivac with
// MMU off
mov \tmp1,#0xff1a0000
mov \tmp2,\value
str \tmp2, [\tmp1]
adr_l \tmp0, regs_save
ldp \tmp1,\tmp2, [\tmp0,#32]
dmb sy
.endm
倒序打印寄存器
print_reg64:打印一个64位寄存器值,比如0x12345678aabbccdd会打印成DDCCBBAA87654321
打印寄存器值的流程是:
这里先不用宏print_reg64,方便理解,后面整理成宏,贴出代码如下:
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled
*/
__create_page_tables:
print_char x0, x1,x2,#0x42 //'B'
print_char x0, x1,x2,#0x3a // ':'
//to print reg
adr_l x0, regs_save // tmp save reg x1-x10
stp x1,x2, [x0]
stp x3, x4, [x0, #16]
stp x5, x6, [x0, #32]
stp x7, x8, [x0, #48]
stp x9, x10, [x0, #64]
dmb sy // needed before dc ivac with
// MMU off
mov x3,#0
ldr x4,=0x12345678aabbccdd //for test the print result is ddccbbaa87654321
str x4,[x0,#72]
dmb sy
111:
ldr x1, [x0,#72] //get data from address x0 to x1
ubfx x1, x1, #0, #4 //get x1[0:3], each time to get half byte, total 8 bytes
mov x2, #9
cmp x1, x2 //compared to 9
b.le 222f
//is 10-15
add x1, x1, #0x37 //10+0x41-10 if num is 10, to print 'A'; 10--15 to print 'A'--'F'
b 223f
222: //is 0--9
add x1, x1, #0x30 //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:
print_char x5,x6,x7,x1
ldr x1, [x0,#72]
lsr x1, x1, #4 //x1=x1>>4 get another half byte
str x1,[x0,#72] //save new x1 to [x0+#72]
add x3,x3,#4
cmp x3,#64
b.lt 111b
adr_l x0, regs_save
ldp x1,x2, [x0]
ldp x3, x4, [x0, #16]
ldp x5, x6, [x0, #32]
ldp x7, x8, [x0, #48]
ldp x9, x10, [x0, #64]
dmb sy
测试结果
由此可见,测试可行,然后整理成宏print_reg64,代码如下
/*
* prin reg(64 bit) or data(64 bit). by xiawei for debug
*
* reg: the reg to print
* regs_save_addr: store the address of regs_save, maybe x0 if not used, and not use x1-x8
*
*/
.macro print_reg64, reg64, regs_save_addr
adr_l \regs_save_addr, regs_save // tmp save reg x1-x8
stp x1,x2, [\regs_save_addr]
stp x3, x4, [\regs_save_addr, #16]
stp x5, x6, [\regs_save_addr, #32]
stp x7, x8, [\regs_save_addr, #48]
dmb sy // needed before dc ivac with
// MMU off
mov x3,#0
//ldr x4,=0x12345678aabbccdd //for test the print result is ddccbbaa87654321
str \reg64,[\regs_save_addr,#72]
dmb sy
111:
ldr x1, [\regs_save_addr,#72] //get data from address \regs_save_addr to x1
ubfx x1, x1, #0, #4 //get x1[0:3], each time to get half byte, total 8 bytes
mov x2, #9
cmp x1, x2 //compared to 9
b.le 222f
//is 10-15
add x1, x1, #0x37 //10+0x41-10 if num is 10, to print 'A'; 10--15 to print 'A'--'F'
b 223f
222: //is 0--9
add x1, x1, #0x30 //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:
print_char x5,x6,x7,x1
ldr x1, [\regs_save_addr,#72]
lsr x1, x1, #4 //x1=x1>>4 get another half byte
str x1,[\regs_save_addr,#72] //save new x1 to [\regs_save_addr+#72]
add x3,x3,#4
cmp x3,#64
b.lt 111b
adr_l \regs_save_addr, regs_save
ldp x1,x2, [\regs_save_addr]
ldp x3, x4, [\regs_save_addr, #16]
ldp x5, x6, [\regs_save_addr, #32]
ldp x7, x8, [\regs_save_addr, #48]
dmb sy
.endm
测试代码,在head.S中__create_page_tables添加测试代码print_char和print_reg64
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled
*/
__create_page_tables:
//test by xiawei for print char
print_char x0, x1,x2,#0x42 //'B'
print_char x0, x1,x2,#0x3a // ':'
//test by xiawei for print reg
ldr x4,=0x12345678aabbccdd //for test the print result is DDCCBBAA87654321, because is little endian
print_reg64 x4,x0
adrp x25, idmap_pg_dir
adrp x26, swapper_pg_dir
mov x28, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
* dirty cache lines being evicted.
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
测试结果同上
使用print_reg64打印出_text(kernel起始地址), 为280000(打印代码参考下面的打印__idmap_text_end,这里省略。打印的结果为280000的倒序),对应加载地址kernel_addr_r,同样可知打印宏正确。
查看system.map
可知_text链接地址为ffffff8008080000, 偏差ffffff8008080000-280000=ffffff8007e00000
测试变量地址__idmap_text_end
所以打印结果应该为ffffff8008a17448 - ffffff8007e00000 = c17448
在head.S中__create_page_tables添加测试代码
打印结果
84471C0000000000倒序即是c17448,打印正确 ,测试再次OK
正常打印寄存器
修改代码,正常打印寄存器
ubfx x1, x1, #60, #4 取bit[60:63],即可打印高4位,
添加优化代码,减少宏里使用的寄存器个数,print_reg64参数reg64可以是任意reg, 参数regs_save_addr不能是x1,x2,x3,其他都可以,修改后的代码如下
/*
* prin reg(64 bit) or data(64 bit). by xiawei for debug
*
* reg: the reg to print
* regs_save_addr: store the address of regs_save, and not use x1-x3
*
*/
.macro print_reg64, reg64, regs_save_addr
adr_l \regs_save_addr, regs_save // tmp save reg x1-x3
stp x1,x2, [\regs_save_addr] //saved in regs_save[0], regs_save[1]
str x3, [\regs_save_addr, #16] //saved in regs_save[2]
dmb sy // needed before dc ivac with
// MMU off
//ldr x4,=0x12345678aabbccdd //for test the print result is 12345678AABBCCDD
str \reg64,[\regs_save_addr,#72] //saved in regs_save[9]
dmb sy
mov x3,#64
111:
ldr x1, [\regs_save_addr,#72] //get data from address \regs_save_addr to x1
ubfx x1, x1, #60, #4 //get x1[60:63], each time to get half byte, total 8 bytes
mov x2, #9
cmp x1, x2 //compared to 9
b.le 222f
//is 10-15
add x1, x1, #0x37 //10+0x41-10 if num is 10, to print 'A'; 10--15 to print 'A'--'F'
b 223f
222: //is 0--9
add x1, x1, #0x30 //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:
print_char \regs_save_addr,x2,x3,x1 //because when entering print_char macro we save x2,x3 ,
//and leaving print_char macro we restore x2,x3, so here we reuse x2,x3
ldr x1, [\regs_save_addr,#72]
lsl x1, x1, #4 //x1=x1<<4 get another half byte
str x1,[\regs_save_addr,#72] //save new x1 to [\regs_save_addr+#72]
sub x3,x3,#4
cmp x3,#0
b.gt 111b
adr_l \regs_save_addr, regs_save
ldp x1,x2, [\regs_save_addr]
ldr x3, [\regs_save_addr, #16]
dmb sy
.endm
测试代码
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled
*/
__create_page_tables:
//test by xiawei for print char
print_char x0, x1,x2,#0x42 //'B'
print_char x0, x1,x2,#0x3a // ':'
//test by xiawei for print reg
ldr x4,=0x12345678aabbccdd //for test the print result is 12345678AABBCCDD
print_reg64 x4,x0
print_char x0, x1,x2,#0x3a // ':'
adr_l x1, _text
print_reg64 x1,x6 //should be 0x280000
print_char x0, x1,x2,#0x3a // ':'
adr_l x1, __idmap_text_end
print_reg64 x1,x6
adrp x25, idmap_pg_dir
adrp x26, swapper_pg_dir
mov x28, lr
/*
* Invalidate the idmap and swapper page tables to avoid potential
* dirty cache lines being evicted.
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
测试结果如下
system.map中,_text地址
实际物理地址0x280000,对应加载地址kernel_addr_r
算出偏移ffffff8008080000-280000=ffffff8007e00000
system.map中,__idmap_text_end地址
ffffff8008a17448减去偏移ffffff8007e00000,得出地址c17448
和上面打印出的地址一致,测试结果OK
注:经过多次测试发现实际最多能同时打印3个寄存器,第4个会打印不全,即代码中调用print_reg64次数小于4个,使用时注意
开启mmu后的打印
在调试到__enable_mmu中使能mmu后该函数卡死,添加的打印代码
测试结果显示,开启了MMU后的打印没有显示出来
分析原因应该是之前的串口发送地址0xff1a0000是物理地址,开启mmu后没有建立地址转换关系导致。
建立虚拟地址到物理地址(串口地址)的转换表:0xffffff80ff1a0000-->0xff1a0000, 转换原理参考之前文章head.S中__idmap_text_start 至__idmap_text_end映射
在__create_page_tables函数Map the kernel image之后添加,代码如下
/*
test by xiawei stetup uatr_thr address (0xff1a0000) map : 0xffffff80ff1a0000-->0xff1a0000, for print after mmu enable
virtual address 0xff1a0000: bit[38:30]=3 so the level 1 table index is 3,
bit[29:21] is the level 2 table index,
*/
ldr x3, =0xffffff80ff1a0000 //virtual address
mov x4,x0 //phy address x0:swapper_pg_dir+4k
/*first to setup level 1 table index 3, which entry is point to swapper_pg_dir+4k */
lsr x3,x3,#30
and x3,x3, #512-1 //level 1 table index
orr x4, x4, #(3<<0)
str x4, [x26, x3, lsl #3] // store the entry
/*second setup level 2 table index*/
ldr x3, =0xffffff80ff1a0000 //virtual address
ldr x4, =0xff1a0000 //phy address
lsr x4, x4, #21
lsr x3, x3, #21
and x3, x3, #512 - 1 // table index
mov x6,#0xF01 //device memory , nGnRnE
orr x4, x6, x4, lsl #SWAPPER_BLOCK_SHIFT // table entry
str x4, [x0, x3, lsl #3] // store the entry
/* test by xiawei
print_char x8, x1,x2,#0x3a // ':'
print_reg64 x26,x8 //address of the level 1 table
ldr x9, [x26, #24] //content of the (levwl 1 table + index3)
print_char x8, x1,x2,#0x3a // ':'
print_reg64 x9,x8 //print the content of the (levwl 1 table + index3)
lsr x9, x9, #12 //level2 table address[47:12]
lsl x9,x9,#12 //level2 table address
ldr x10, [x9, x3, lsl #3] //content of the (level 2 table + index )
print_char x8, x1,x2,#0x3a // ':'
print_reg64 x10,x8 //print the content of the (level 2 table + index )
*/
复用level 1 table, 但index变为3。为了省一张level 2 table表,让index 3对应的表项地址为之前kernel image建立使用的level 2 table。即level 1 table的index 0和index 3指向同一个level 2 table。
level 2 table 添加新的index,该index对应entry的物理地址仍然为0xff1a0000。映射关系如下
kernel img的映射关系参考之前文章head.S kernel image映射
然后添加新的打印宏print_char_mmu,print_reg64_mmu,宏里串口地址使用了虚拟地址0xffffff80ff1a0000,并且使用ldr指令替换adr指令取地址
/*
* prin char after mmu on. by xiawei for debug
*
* tmp0: tmp reg to save regs_save
* tmp1: tmp reg for uart2 thr register
* tmp2: tmp reg for char value
* value: char value
*
*/
.macro print_char_mmu, tmp0, tmp1, tmp2, value
ldr \tmp0, =regs_save // tmp save reg /tmp1, /tmp2
stp \tmp1,\tmp2, [\tmp0,#32] //saved in regs_save[4], regs_save[5]
dmb sy // needed before dc ivac with
// MMU off
ldr \tmp1,=0xffffff80ff1a0000
mov \tmp2,\value
str \tmp2, [\tmp1]
ldr \tmp0, =regs_save
ldp \tmp1,\tmp2, [\tmp0,#32]
dmb sy
.endm
/*
* prin reg(64 bit) or data(64 bit) after mmu on. by xiawei for debug
*
* reg: the reg to print
* regs_save_addr: store the address of regs_save, and not use x1-x3
*
*/
.macro print_reg64_mmu, reg64, regs_save_addr
ldr \regs_save_addr, =regs_save // tmp save reg x1-x3
stp x1,x2, [\regs_save_addr] //saved in regs_save[0], regs_save[1]
str x3, [\regs_save_addr, #16] //saved in regs_save[2]
dmb sy // needed before dc ivac with
// MMU off
//ldr x4,=0x12345678aabbccdd //for test the print result is 12345678AABBCCDD
str \reg64,[\regs_save_addr,#72] //saved in regs_save[9]
dmb sy
mov x3,#64
111:
ldr x1, [\regs_save_addr,#72] //get data from address \regs_save_addr to x1
ubfx x1, x1, #60, #4 //get x1[60:63], each time to get half byte, total 8 bytes
mov x2, #9
cmp x1, x2 //compared to 9
b.le 222f
//is 10-15
add x1, x1, #0x37 //10+0x41-10 if num is 10, to print 'A'; 10--15 to print 'A'--'F'
b 223f
222: //is 0--9
add x1, x1, #0x30 //if num is 0, to print '0'; 0--9 to print '0'--'9'
223:
print_char_mmu \regs_save_addr,x2,x3,x1 //because when entering print_char macro we save x2,x3 ,
//and leaving print_char macro we restore x2,x3, so here we reuse x2,x3
ldr x1, [\regs_save_addr,#72]
lsl x1, x1, #4 //x1=x1<<4 get another half byte
str x1,[\regs_save_addr,#72] //save new x1 to [\regs_save_addr+#72]
sub x3,x3,#4
cmp x3,#0
b.gt 111b
ldr \regs_save_addr, =regs_save
ldp x1,x2, [\regs_save_addr]
ldr x3, [\regs_save_addr, #16]
dmb sy
.endm
测试代码
打印结果
可以发现开启mmu之前_text地址为0x280000, 开启之后使用adr_l命令址为0x280000,使用ldr命令地址则为0xFFFFFF8008080000。
那么什么时候adr_l x8, _text的地址变为0xFFFFFF8008080000呢,是在__primary_switch:函数
此时__primary_switched为虚拟地址,br之后进入__primary_switched函数,此时adr_l x8, _text的地址变为0xFFFFFF8008080000。
在__primary_switched函数入口添加测试代码(先将之前的测试代码注释掉)
打印结果如下
结论:
a.在开启mmu之前以及之后打印寄存器宏都可以正常执行。
b.验证了地址转换表建立的规则
c. ldr x8, =__primary_switched
br x8
这两步之后,adr_l取出的地址才是虚拟地址
后记:
实测发现print_char print_reg64宏同样适用于linux kernel 5.10中。