分析
1.如果add x9, x9, x10 ---> x9 = 0x0000003dd2519e20+0xffffffc0ed9c3000 = 0xfffffffebfedce80 ---> 地址是map的。
如果add x9, x9, x10 ---> x9 = 0x00+0xffffffc0ed9c3000 = 0xffffffc0ed9c3000+0x60([0x60] u64 nfct_slow_path) ----> ffffffc0ed9c3060 notmap的。
2.需要看下ldr x9, [x10, #96] DDR里的地址是0x0000003dd2519e20,CPU地址是0x0,但是X9已经被覆盖,无法追踪了。
3.从0xffffffc009f22714 --> 0xffffffc009f22734 没有发现跳转。
4.推测怀疑ims_bridge.ko还没有mdprobe完成,mc_dad_timer就创建了,调用了hook函数。但是DDR数据是0x0000003dd2519e20,计算结果来看却是cache/寄存器的数据是0。
--- 这里推测到为什么当时数据为0,死机后数据为正常了。因为DDR的数据 在KO未被重定位以前是0,然后死机,打出来寄存器和cache,此时是0.
然后KO继续加载,覆盖了DDR的地址,所以DDR与寄存器 cache里不一致。
co-work
转入network模块,代码优化
[ 3.100180] modprobe: Loading module /vendor/lib/modules/ims_bridge.ko with args ''
[ 3.103979] calling init_module.cfi_jt+0x0/0x4 [ims_bridge] @ 344
[ 3.104026] Unable to handle kernel paging request at virtual address ffffffc0ed9c3060
[ 3.104028] Mem abort info:
[ 3.104029] ESR = 0x96000005
[ 3.104030] EC = 0x25: DABT (current EL), IL = 32 bits
[ 3.104032] SET = 0, FnV = 0
[ 3.104032] EA = 0, S1PTW = 0
[ 3.104033] Data abort info:
[ 3.104033] ISV = 0, ISS = 0x00000005
[ 3.104034] CM = 0, WnR = 0
[ 3.104036] swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000081c0a000
[ 3.104037] [ffffffc0ed9c3060] pgd=0000000000000000, pud=0000000000000000
[ 3.104041] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[ 3.106959] initcall init_module.cfi_jt+0x0/0x4 [ims_bridge] returned 0 after 2879 usecs
[ 3.107136] modprobe: Loaded kernel module /vendor/lib/modules/ims_bridge.ko
[ 3.237077] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G S C O 5.4.210-android12-9-g9b0a2e0ae539 #2
[ 3.237084] pstate: 40400005 (nZcv daif +PAN -UAO)
[ 3.237118] pc : nf_imsbr_ipv6_frag_output+0x320/0x440 [ims_bridge]
[ 3.237131] lr : nf_hook_slow+0x84/0x120
[ 3.248320] sp : ffffffc01003bb10
[ 3.248323] x29: ffffffc01003bb60 x28: 0000003dd2612e40
[ 3.248327] x27: ffffff80eab94810 x26: ffffff80eab94840
[ 3.248331] x25: ffffffc01106ad48 x24: ffffff80f1168630
[ 3.248334] x23: ffffffc009f1f674 x22: 0000000000000000
[ 3.248338] x21: 0000000000000010 x20: ffffff80eab94800
[ 3.337973] x19: ffffff80f1684000 x18: ffffffc01003d068
[ 3.337977] x17: 0000000000000048 x16: 0000000000000000
[ 3.337980] x15: 0000000000000004 x14: 0000000000000000
[ 3.337983] x13: 0000000000000000 x12: 0000000000000000
[ 3.337986] x11: 00000000000000e9 x10: 0000000000000001
[ 3.337989] x9 : ffffffc0ed9c3060 x8 : ffffff80fa9bbb00
[ 3.337992] x7 : 0000000000000000 x6 : 000000000000003f
[ 3.337995] x5 : 0000000000000000 x4 : 0000000000000000
[ 3.338001] x3 : 0000000000000000 x2 : ffffffc01003bc00
[ 3.355195] x1 : ffffff80f1684000 x0 : ffffffc011e68100
#10 [ffffffc01003bb60] nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee at ffffffc009f22730 [ims_bridge]
#11 [ffffffc01003bba0] nf_hook_slow at ffffffc010d54ee0
#12 [ffffffc01003bc90] mld_sendpack at ffffffc010e64fe0
#13 [ffffffc01003bcf0] mld_send_initial_cr at ffffffc010e63394
#14 [ffffffc01003bd30] mld_dad_timer_expire$dc6d60b8b58e2bbf650fb3a957f129e5 at ffffffc010e63db4
#15 [ffffffc01003bd70] call_timer_fn at ffffffc010332098
#16 [ffffffc01003bdc0] expire_timers at ffffffc010331f04
#17 [ffffffc01003be80] __run_timers at ffffffc010331e34
#18 [ffffffc01003bee0] run_timer_softirq$7c4e04c20f69fcaefa4dc2baf7ce27f0 at ffffffc010331360
#19 [ffffffc01003bf20] __softirqentry_text_start at ffffffc010081c44
#20 [ffffffc01003bf80] irq_exit at ffffffc01028c594
#21 [ffffffc01003bfa0] __handle_domain_irq at ffffffc010308338
#22 [ffffffc01003bfe0] gic_handle_irq$c78e2e35ec3ce21507d905ddd4bba366 at ffffffc010081a8c
crash_arm64> bt 18 -- UN 态
last_update_time = 4753165312,
last_arrival = 4803767250,
PID: 18 TASK: ffffff80faa20000 CPU: 1 COMMAND: "kworker/1:0"
#0 [ffffffc012533bc0] __switch_to at ffffffc01021eed4
#1 [ffffffc012533c10] __schedule at ffffffc01103f0dc
#2 [ffffffc012533c70] schedule at ffffffc01103f6f8
#3 [ffffffc012533ce0] schedule_timeout at ffffffc011044028
#4 [ffffffc012533d10] msleep at ffffffc0103313b0
#5 [ffffffc012533d30] imsbr_sipc_query_to_register$0864b5e6b918ca40c765c00633cfe2ae at ffffffc009f2365c [ims_bridge]
#6 [ffffffc012533d70] imsbr_sipc_init_work$0864b5e6b918ca40c765c00633cfe2ae at ffffffc009f237a0 [ims_bridge]
#7 [ffffffc012533d90] process_one_work at ffffffc0102acedc
#8 [ffffffc012533e00] worker_thread$f31e2447a3fdcb60f4b193f95acd647c at ffffffc0102ad4b0
#9 [ffffffc012533e60] kthread$bdfae7274f17a094019e62b74615f335 at ffffffc0102b38a4
#10 [ffffffc012533ec0] ret_from_fork at ffffffc0100861ac
/home/android/bsp/kernel5.4/kernel5.4/include/linux/netfilter.h: 136
0xffffffc010d54ee0 <nf_hook_slow+0x80>: blr x23
crash_arm64_v8.0.1_unisoc>
/home/android/bsp/kernel5.4/kernel5.4/net/ims_bridge/imsbr_hooks.c: 133
0xffffffc009f22714 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x300>: adrp x10, 0xffffffc009f30000 <esphs>
/home/android/bsp/kernel5.4/kernel5.4/arch/arm64/include/asm/preempt.h: 47
0xffffffc009f22718 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x304>: add w9, w9, #0x1
/home/android/bsp/kernel5.4/kernel5.4/include/linux/compiler.h: 294
0xffffffc009f2271c <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x308>: str w9, [x8, #24]
/home/android/bsp/kernel5.4/kernel5.4/net/ims_bridge/imsbr_hooks.c: 133
0xffffffc009f22720 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x30c>: ldr x9, [x10, #96] // x9 = 0x0000003dd2519e20 // 0000000000000060 <imsbr_stats>:
/home/android/bsp/kernel5.4/kernel5.4/arch/arm64/include/asm/percpu.h: 30
0xffffffc009f22724 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x310>: mrs x10, tpidr_el1 // x10 = 0xffffffc0ed9c3000
/home/android/bsp/kernel5.4/kernel5.4/net/ims_bridge/imsbr_hooks.c: 133
0xffffffc009f22728 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x314>: add x9, x9, x10 // 这里的x9为什么没有加成功?但是应该是0x0000003dd2519e20
0xffffffc009f2272c <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x318>: add x9, x9, #0x60 // 错误地址 = 0xffffffc0ed9c3000+0x60 = ffffffc0ed9c3060
0xffffffc009f22730 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x31c>: mov w10, #0x1 // #1
/home/android/bsp/kernel5.4/kernel5.4/arch/arm64/include/asm/percpu.h: 112
0xffffffc009f22734 <nf_imsbr_ipv6_frag_output$a60cb5c403db4b770e1e4ecd74d6cfee+0x320>: stadd x10, [x9] // <------- crash
crash_arm64> lsmod | grep ims_bridge
ffffffc009f30cc0 ims_bridge ffffffc009f1e000 122880 ./../ko_symbols/ims_bridge.ko
KO反汇编分析
Disassembly of section .bss:
0000000000000000 <esphs>:
...
0000000000000050 <__init_completion.__key>:
...
0000000000000058 <imsbr_flow_bucket>:
...
0000000000000060 <imsbr_stats>:
...
0000000000000068 <imsbr_flow_pool>:
...
0000000000000070 <cur_lp_state>:
...
0000000000000078 <imsbr_flow_cache>:
struct imsbr_stat { <---- x9
[0x60] u64 nfct_slow_path;
crash_arm64> p imsbr_stats -x
imsbr_stats = $1 = (struct imsbr_stat *) 0x3dd2519e20
代码分析
2577 void ipv6_mc_init_dev(struct inet6_dev *idev)
2578 {
2579 write_lock_bh(&idev->lock);
2580 spin_lock_init(&idev->mc_lock);
2581 idev->mc_gq_running = 0;
2582 timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0);
2583 idev->mc_tomb = NULL;
2584 idev->mc_ifc_count = 0;
2585 timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0);
2586 timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0); // create timer。
2587 ipv6_mc_reset(idev);
2588 write_unlock_bh(&idev->lock);
2589 }
506 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
507 const struct nf_hook_entries *e, unsigned int s)
508 {
509 unsigned int verdict;
510 int ret;
511
512 for (; s < e->num_hook_entries; s++) {
513 verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
514 switch (verdict & NF_VERDICT_MASK) {
515 case NF_ACCEPT:
516 break;
517 case NF_DROP:
518 kfree_skb(skb);
519 ret = NF_DROP_GETERR(verdict);
520 if (ret == 0)
521 ret = -EPERM;
522 return ret;
523 case NF_QUEUE:
524 ret = nf_queue(skb, state, s, verdict);
525 if (ret == 1)
526 continue;
527 return ret;
528 default:
529 /* Implicit handling for NF_STOLEN, as well as any other
530 * non conventional verdicts.
531 */
532 return 0;
533 }
534 }
535
536 return 1;
537 }
初始化
struct imsbr_stat __percpu *imsbr_stats;
int __init imsbr_core_init(void)
1034 {
1035 int i;
1036
1037 imsbr_hash_rnd = prandom_u32();
1038 atomic_set(&imsbr_enabled, 0);
1039
1040 for (i = 0; i < IMSBR_SIMCARD_NUM; i++) {
1041 imsbr_simcards[i].init_call = IMSBR_CALLS_END;
1042 imsbr_simcards[i].curr_call = IMSBR_CALLS_END;
1043
1044 atomic_set(&imsbr_simcards[i].ho_state, IMSBR_HO_FINISH);
1045 }
1046
1047 imsbr_flow_bucket = kmalloc(IMSBR_FLOW_HSIZE *
1048 sizeof(struct hlist_head), GFP_KERNEL);
1049 if (!imsbr_flow_bucket)
1050 goto err_bucket;
1051 for (i = 0; i < IMSBR_FLOW_HSIZE; i++)
1052 INIT_HLIST_HEAD(&imsbr_flow_bucket[i]);
1053
1054 imsbr_stats = alloc_percpu(struct imsbr_stat);
1055 if (!imsbr_stats)
1056 goto err_percpu;
1057 if (imsbr_mempool_init())
1058 goto err_mempool;
1059
1060 if (imsbr_init_proc(&init_net))
1061 goto err_proc;
1062
1063 return 0;
1064
1065 err_proc:
1066 imsbr_mempool_exit();
1067 err_mempool:
1068 free_percpu(imsbr_stats);
1069 err_percpu:
1070 kfree(imsbr_flow_bucket);
1071 err_bucket:
1072 return -ENOMEM;
1073 }
调用死机栈
1090 static unsigned int nf_imsbr_ipv6_frag_output(void *priv,
1091 struct sk_buff *skb,
1092 const struct nf_hook_state *state)
1093 {
1094 struct net *net;
1095 struct nf_conntrack_tuple nft;
1096 struct imsbr_flow *flow;
1097 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
1098 struct udphdr *uh;
1099 struct dst_entry *dst = skb_dst(skb);
1100 struct xfrm_state *x = dst->xfrm;
1101 int pmtu = 1400;
1102 int transport_offset;
1103
1104 if (!xfrm_frag_enable)
1105 return NF_ACCEPT;
1106
1107 if (imsbr_get_tuple(state->net, skb, &nft))
1108 return NF_ACCEPT;
119 static int imsbr_get_tuple(struct net *net, struct sk_buff *skb,
120 struct nf_conntrack_tuple *nft)
121 {
122 enum ip_conntrack_info ctinfo;
123 const struct nf_conn *ct;
124 int dir;
125
126 ct = nf_ct_get(skb, &ctinfo);
127 if (likely(ct)) {
128 dir = CTINFO2DIR(ctinfo);
129 *nft = ct->tuplehash[dir].tuple;
130 return 0;
131 }
132
133 IMSBR_STAT_INC(imsbr_stats->nfct_slow_path); <--------- // [96] u64 nfct_slow_path; 怀疑这里的KO还没有初始化完成。担心cache里数据与DDR不一致。
134 return imsbr_parse_nfttuple(net, skb, nft);
135 }
文章详细描述了一个内核模块ims_bridge.ko在加载过程中出现的内存访问问题,导致数据异常和系统崩溃。问题可能源于模块加载时的数据未正确初始化,以及DDR内存与寄存器、缓存中的数据不一致。在尝试加载模块时,出现了无法处理的页请求错误,且在执行特定指令时,地址计算错误,可能导致了后续的内存访问异常。此外,还提到了ims_bridge.ko的初始化过程和nf_hook_slow函数在处理网络数据包时的角色。
2432

被折叠的 条评论
为什么被折叠?



