第21章 Linux设备驱动的调试之BUG_ON()和WARN_ON()

21.7 BUG_ON()和WARN_ON()

    内核中有许多地方调用类似BUG()的语句,它非常像一个内核运行时的断言,意味着本来不该执行到BUG()这条语句,一旦执行即抛出Oops。BUG()的定义为:

include/asm-generic/bug.h

#define BUG() do { \
       printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \
       panic("BUG!"); \
} while (0)

其中panic()定义在kernel/panic.c中,会导致内核崩溃,并打印Oops。

/**
 * panic - halt the system
 * @fmt: The text string to print
 *
 * Display a message, then perform cleanups.
 *
 * This function never returns.
 */
void panic(const char *fmt, ...)
{
static char buf[1024];
va_list args;
long i, i_next = 0;
int state = 0;
int old_cpu, this_cpu;
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;

/*
* Disable local interrupts. This will prevent panic_smp_self_stop
* from deadlocking the first cpu that invokes the panic, since
* there is nothing to prevent an interrupt handler (that runs
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();

/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*
* Only one CPU is allowed to execute the panic code from here. For
* multiple parallel invocations of panic, all other CPUs either
* stop themself or will wait until they are stopped by the 1st CPU
* with smp_send_stop().
*
* `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
* comes here, so go ahead.
* `old_cpu == this_cpu' means we came from nmi_panic() which sets
* panic_cpu to this CPU.  In this case, this is also the 1st CPU.

// SPDX-License-Identifier: GPL-2.0-only /* * MTD Oops/Panic logger * * Copyright © 2007 Nokia Corporation. All rights reserved. * * Author: Richard Purdie <rpurdie@openedhand.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/console.h> #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/timekeeping.h> #include <linux/mtd/mtd.h> #include <linux/kmsg_dump.h> #include <linux/reboot.h> #include <linux/platform_device.h> /* Maximum MTD partition size */ #define MTDOOPS_MAX_MTD_SIZE (16 * 1024 * 1024) static unsigned long record_size = 4096; module_param(record_size, ulong, 0400); MODULE_PARM_DESC(record_size, "record size for MTD OOPS pages in bytes (default 4096)"); static char mtddev[80]; module_param_string(mtddev, mtddev, 80, 0400); MODULE_PARM_DESC(mtddev, "name or index number of the MTD device to use"); static int dump_oops = 1; module_param(dump_oops, int, 0600); MODULE_PARM_DESC(dump_oops, "set to 1 to dump oopses, 0 to only dump panics (default 1)"); static unsigned long lkmsg_record_size = 512 * 1024; extern struct raw_notifier_head pwrkey_irq_notifier_list; #define MAX_CMDLINE_PARAM_LEN 256 static char build_fingerprint[MAX_CMDLINE_PARAM_LEN] = {0}; module_param_string(fingerprint, build_fingerprint, MAX_CMDLINE_PARAM_LEN,0644); static int boot_mode = 0; module_param(boot_mode, int, 0600); MODULE_PARM_DESC(boot_mode, "boot_mode (default 0)"); #define MTDOOPS_KERNMSG_MAGIC_v1 0x5d005d00 /* Original */ #define MTDOOPS_KERNMSG_MAGIC_v2 0x5d005e00 /* Adds the timestamp */ #define MTDOOPS_HEADER_SIZE 8 enum mtd_dump_reason { MTD_DUMP_UNDEF, MTD_DUMP_PANIC, MTD_DUMP_OOPS, MTD_DUMP_EMERG, MTD_DUMP_SHUTDOWN, MTD_DUMP_RESTART, MTD_DUMP_POWEROFF, MTD_DUMP_LONG_PRESS, MTD_DUMP_MAX }; static char *kdump_reason[8] = { "Unknown", "Kernel Panic", "Oops!", "Emerg", "Shut Down", "Restart", "PowerOff", "Long Press" }; enum mtdoops_log_type { MTDOOPS_TYPE_UNDEF, MTDOOPS_TYPE_DMESG, MTDOOPS_TYPE_PMSG, }; static char *log_type[4] = { "Unknown", "LAST KMSG", "LAST LOGCAT" }; struct pmsg_buffer_hdr { uint32_t sig; atomic_t start; atomic_t size; uint8_t data[0]; }; struct pmsg_platform_data { unsigned long mem_size; phys_addr_t mem_address; unsigned long console_size; unsigned long pmsg_size; }; struct mtdoops_hdr { u32 seq; u32 magic; ktime_t timestamp; } __packed; static struct mtdoops_context { struct kmsg_dumper dump; struct notifier_block reboot_nb; struct notifier_block pwrkey_long_press_nb; struct pmsg_platform_data pmsg_data; int mtd_index; struct work_struct work_erase; struct work_struct work_write; struct mtd_info *mtd; int oops_pages; int nextpage; int nextcount; unsigned long *oops_page_used; unsigned long oops_buf_busy; void *oops_buf; } oops_cxt; static void mark_page_used(struct mtdoops_context *cxt, int page) { set_bit(page, cxt->oops_page_used); } static void mark_page_unused(struct mtdoops_context *cxt, int page) { clear_bit(page, cxt->oops_page_used); } static int page_is_used(struct mtdoops_context *cxt, int page) { return test_bit(page, cxt->oops_page_used); } static int mtdoops_erase_block(struct mtdoops_context *cxt, int offset) { struct mtd_info *mtd = cxt->mtd; u32 start_page_offset = mtd_div_by_eb(offset, mtd) * mtd->erasesize; u32 start_page = start_page_offset / record_size; u32 erase_pages = mtd->erasesize / record_size; struct erase_info erase; int ret; int page; erase.addr = offset; erase.len = mtd->erasesize; ret = mtd_erase(mtd, &erase); if (ret) { pr_warn("erase of region [0x%llx, 0x%llx] on \"%s\" failed\n", (unsigned long long)erase.addr, (unsigned long long)erase.len, mtddev); return ret; } /* Mark pages as unused */ for (page = start_page; page < start_page + erase_pages; page++) mark_page_unused(cxt, page); return 0; } static void mtdoops_erase(struct mtdoops_context *cxt) { struct mtd_info *mtd = cxt->mtd; int i = 0, j, ret, mod; /* We were unregistered */ if (!mtd) return; mod = (cxt->nextpage * record_size) % mtd->erasesize; if (mod != 0) { cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size); if (cxt->nextpage >= cxt->oops_pages) cxt->nextpage = 0; } while ((ret = mtd_block_isbad(mtd, cxt->nextpage * record_size)) > 0) { badblock: pr_warn("bad block at %08lx\n", cxt->nextpage * record_size); i++; cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size); if (cxt->nextpage >= cxt->oops_pages) cxt->nextpage = 0; if (i == cxt->oops_pages / (mtd->erasesize / record_size)) { pr_err("all blocks bad!\n"); return; } } if (ret < 0) { pr_err("mtd_block_isbad failed, aborting\n"); return; } for (j = 0, ret = -1; (j < 3) && (ret < 0); j++) ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size); if (ret >= 0) { pr_debug("ready %d, %d\n", cxt->nextpage, cxt->nextcount); return; } if (ret == -EIO) { ret = mtd_block_markbad(mtd, cxt->nextpage * record_size); if (ret < 0 && ret != -EOPNOTSUPP) { pr_err("block_markbad failed, aborting\n"); return; } } goto badblock; } /* Scheduled work - when we can't proceed without erasing a block */ static void mtdoops_workfunc_erase(struct work_struct *work) { struct mtdoops_context *cxt = container_of(work, struct mtdoops_context, work_erase); mtdoops_erase(cxt); } static void mtdoops_inc_counter(struct mtdoops_context *cxt, int panic) { cxt->nextpage++; if (cxt->nextpage >= cxt->oops_pages) cxt->nextpage = 0; cxt->nextcount++; if (cxt->nextcount == 0xffffffff) cxt->nextcount = 0; if (page_is_used(cxt, cxt->nextpage)) { pr_debug("not ready %d, %d (erase %s)\n", cxt->nextpage, cxt->nextcount, panic ? "immediately" : "scheduled"); if (panic) { /* In case of panic, erase immediately */ mtdoops_erase(cxt); } else { /* Otherwise, schedule work to erase it "nicely" */ schedule_work(&cxt->work_erase); } } else { pr_debug("ready %d, %d (no erase)\n", cxt->nextpage, cxt->nextcount); } } static void mtdoops_write(struct mtdoops_context *cxt, int panic) { struct mtd_info *mtd = cxt->mtd; size_t retlen; struct mtdoops_hdr *hdr; int ret; if (test_and_set_bit(0, &cxt->oops_buf_busy)) return; /* Add mtdoops header to the buffer */ hdr = (struct mtdoops_hdr *)cxt->oops_buf; hdr->seq = cxt->nextcount; hdr->magic = MTDOOPS_KERNMSG_MAGIC_v2; hdr->timestamp = ktime_get_real(); if (panic) { ret = mtd_panic_write(mtd, cxt->nextpage * record_size, record_size, &retlen, cxt->oops_buf); if (ret == -EOPNOTSUPP) { pr_err("Cannot write from panic without panic_write\n"); goto out; } } else ret = mtd_write(mtd, cxt->nextpage * record_size, record_size, &retlen, cxt->oops_buf); if (retlen != record_size || ret < 0) pr_err("write failure at %ld (%td of %ld written), error %d\n", cxt->nextpage * record_size, retlen, record_size, ret); mark_page_used(cxt, cxt->nextpage); // memset(cxt->oops_buf, 0xff, record_size); // mtdoops_inc_counter(cxt, panic); out: clear_bit(0, &cxt->oops_buf_busy); } static void mtdoops_workfunc_write(struct work_struct *work) { struct mtdoops_context *cxt = container_of(work, struct mtdoops_context, work_write); mtdoops_write(cxt, 0); } static void find_next_position(struct mtdoops_context *cxt) { struct mtd_info *mtd = cxt->mtd; struct mtdoops_hdr hdr; int ret, page, maxpos = 0; u32 maxcount = 0xffffffff; size_t retlen; for (page = 0; page < cxt->oops_pages; page++) { if (mtd_block_isbad(mtd, page * record_size)) continue; /* Assume the page is used */ mark_page_used(cxt, page); ret = mtd_read(mtd, page * record_size, sizeof(hdr), &retlen, (u_char *)&hdr); if (retlen != sizeof(hdr) || (ret < 0 && !mtd_is_bitflip(ret))) { pr_err("read failure at %ld (%zu of %zu read), err %d\n", page * record_size, retlen, sizeof(hdr), ret); continue; } if (hdr.seq == 0xffffffff && hdr.magic == 0xffffffff) mark_page_unused(cxt, page); if (hdr.seq == 0xffffffff || (hdr.magic != MTDOOPS_KERNMSG_MAGIC_v1 && hdr.magic != MTDOOPS_KERNMSG_MAGIC_v2)) continue; if (maxcount == 0xffffffff) { maxcount = hdr.seq; maxpos = page; } else if (hdr.seq < 0x40000000 && maxcount > 0xc0000000) { maxcount = hdr.seq; maxpos = page; } else if (hdr.seq > maxcount && hdr.seq < 0xc0000000) { maxcount = hdr.seq; maxpos = page; } else if (hdr.seq > maxcount && hdr.seq > 0xc0000000 && maxcount > 0x80000000) { maxcount = hdr.seq; maxpos = page; } } if (maxcount == 0xffffffff) { cxt->nextpage = cxt->oops_pages - 1; cxt->nextcount = 0; } else { cxt->nextpage = maxpos; cxt->nextcount = maxcount; } mtdoops_inc_counter(cxt, 0); } static void mtdoops_add_reason(char *oops_buf, int reason, enum mtdoops_log_type type, int index, int nextpage) { char str_buf[512] = {0}; int ret_len = 0; struct timespec64 now; struct tm ts; char temp_buf[32] = {0}; int temp_len = 0; char BootMode[20] = {0}; unsigned long local_time; ktime_get_coarse_real_ts64(&now); /*set title time to UTC+8*/ local_time = (unsigned long)(now.tv_sec + 8 * 60 * 60); time64_to_tm(local_time, 0, &ts); if (boot_mode == 0) { strcpy(BootMode, "normal"); } else if (boot_mode == 1) { strcpy(BootMode, "recovery"); } else if (boot_mode == 2) { strcpy(BootMode, "poweroff_charger"); } temp_len = snprintf(temp_buf, 32,"\n ---mtdoops report start--- \n"); memcpy(oops_buf, temp_buf, temp_len); ret_len = snprintf(str_buf, 200, "\n```\n## Oops_Index: %d\n### Build: %s\n## REASON: %s\n#### LOG TYPE:%s\n## BOOT MODE:%s\n##### %04ld-%02d-%02d %02d:%02d:%02d\n```c\n", index, build_fingerprint, kdump_reason[reason], log_type[type], BootMode, ts.tm_year+1900, ts.tm_mon + 1, ts.tm_mday, ts.tm_hour, ts.tm_min, ts.tm_sec); if(ret_len >= sizeof(str_buf)) ret_len = sizeof(str_buf); memcpy(oops_buf + temp_len, str_buf, ret_len); } static void mtdoops_add_pmsg_head(char *oops_buf, enum mtdoops_log_type type) { char str_buf[80] = {0}; int ret_len = 0; struct timespec64 now; struct tm ts; unsigned long local_time; ktime_get_coarse_real_ts64(&now); local_time = (unsigned long)(now.tv_sec + 8 * 60 * 60); time64_to_tm(local_time, 0, &ts); ret_len = snprintf(str_buf, 80, "\n```\n#### LOG TYPE:%s\n#####%04ld-%02d-%02d %02d:%02d:%02d\n```\n", log_type[type], ts.tm_year + 1900, ts.tm_mon + 1, ts.tm_mday, ts.tm_hour, ts.tm_min, ts.tm_sec); memcpy(oops_buf, str_buf, ret_len); } static void mtdoops_do_dump(struct kmsg_dumper *dumper, enum mtd_dump_reason reason) { struct mtdoops_context *cxt = container_of(dumper, struct mtdoops_context, dump); struct kmsg_dump_iter iter; size_t ret_len = 0; void *pmsg_buffer_start = NULL; struct pmsg_buffer_hdr *p_hdr = NULL; int j = 0; int ret = 0; static int do_dump_count = 0; if(cxt->mtd == NULL) return; if(reason == KMSG_DUMP_SHUTDOWN || reason == KMSG_DUMP_EMERG) return; /* Only dump oopses if dump_oops is set */ if (reason == KMSG_DUMP_OOPS && !dump_oops) return; do_dump_count++; pr_err("%s start , count = %d , page = %d, reason = %d, dump_count = %d\n", __func__, cxt->nextcount, cxt->nextpage, reason, do_dump_count); if(do_dump_count>1) { for (j = 0, ret = -1; (j < 3) && (ret < 0); j++) ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size); } kmsg_dump_rewind(&iter); if (test_and_set_bit(0, &cxt->oops_buf_busy)) return; kmsg_dump_get_buffer(&iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, lkmsg_record_size - MTDOOPS_HEADER_SIZE, &ret_len); clear_bit(0, &cxt->oops_buf_busy); mtdoops_add_reason(cxt->oops_buf + MTDOOPS_HEADER_SIZE, reason, MTDOOPS_TYPE_DMESG, cxt->nextcount, cxt->nextpage); pmsg_buffer_start = phys_to_virt( (cxt->pmsg_data.mem_address + cxt->pmsg_data.mem_size)- cxt->pmsg_data.pmsg_size); p_hdr = (struct pmsg_buffer_hdr *)pmsg_buffer_start; pr_err("mtdoops_do_dump pmsg paddr = 0x%p \n", pmsg_buffer_start); if(p_hdr->sig == 0x43474244) { void *oopsbuf = cxt->oops_buf + (MTDOOPS_HEADER_SIZE + ret_len); uint8_t *p_buff_end = (uint8_t *)p_hdr->data + atomic_read(&p_hdr->size); int pmsg_cp_size = 0; int pstart = p_hdr->start.counter; int psize = p_hdr->size.counter; pmsg_cp_size = (record_size - (ret_len + MTDOOPS_HEADER_SIZE)); if (psize <= pmsg_cp_size) pmsg_cp_size = psize; if (pstart >= pmsg_cp_size) { memcpy(oopsbuf, p_hdr->data, pmsg_cp_size); } else { memcpy(oopsbuf, p_buff_end - (pmsg_cp_size - pstart), pmsg_cp_size - pstart); memcpy(oopsbuf + (pmsg_cp_size - pstart), p_hdr->data, pstart); } mtdoops_add_pmsg_head(cxt->oops_buf + (MTDOOPS_HEADER_SIZE + ret_len), MTDOOPS_TYPE_PMSG); } else pr_err("mtdoops: read pmsg failed sig = 0x%x \n", p_hdr->sig); if (reason == KMSG_DUMP_OOPS || reason == KMSG_DUMP_PANIC) { /* Panics must be written immediately */ mtdoops_write(cxt, 1); } else { /*we should write log immediately , if use work to write, *ufs will shutdown before write log finish */ mtdoops_write(cxt, 0); } pr_err("mtdoops_do_dump() finish \n"); } static int mtdoops_reboot_nb_handle(struct notifier_block *this, unsigned long event, void *ptr) { enum mtd_dump_reason reason; struct mtdoops_context *cxt = &oops_cxt; if (event == SYS_RESTART) reason = MTD_DUMP_RESTART; else if(event == SYS_POWER_OFF) reason = MTD_DUMP_POWEROFF; else return NOTIFY_OK; mtdoops_do_dump(&cxt->dump, reason); return NOTIFY_OK; } static int pwrkey_long_press_irq_event(struct notifier_block *this, unsigned long event, void *ptr) { struct mtdoops_context *cxt = &oops_cxt; mtdoops_do_dump(&cxt->dump, MTD_DUMP_LONG_PRESS); return NOTIFY_DONE; } static void mtdoops_do_null(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { return; } static void mtdoops_notify_add(struct mtd_info *mtd) { struct mtdoops_context *cxt = &oops_cxt; u64 mtdoops_pages = div_u64(mtd->size, record_size); int err; if (!strcmp(mtd->name, mtddev)) cxt->mtd_index = mtd->index; if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) return; if (mtd->size < mtd->erasesize * 2) { pr_err("MTD partition %d not big enough for mtdoops\n", mtd->index); return; } if (mtd->erasesize < record_size) { pr_err("eraseblock size of MTD partition %d too small\n", mtd->index); return; } if (mtd->size > MTDOOPS_MAX_MTD_SIZE) { pr_err("mtd%d is too large (limit is %d MiB)\n", mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024); return; } /* oops_page_used is a bit field */ cxt->oops_page_used = vmalloc(array_size(sizeof(unsigned long), DIV_ROUND_UP(mtdoops_pages, BITS_PER_LONG))); if (!cxt->oops_page_used) { pr_err("could not allocate page array\n"); return; } cxt->dump.max_reason = KMSG_DUMP_MAX; cxt->dump.dump = mtdoops_do_null; err = kmsg_dump_register(&cxt->dump); if (err) { pr_err("registering kmsg dumper failed, error %d\n", err); vfree(cxt->oops_page_used); cxt->oops_page_used = NULL; return; } /*for restart and power off*/ cxt->reboot_nb.notifier_call = mtdoops_reboot_nb_handle; cxt->reboot_nb.priority = 255; register_reboot_notifier(&cxt->reboot_nb); cxt->pwrkey_long_press_nb.notifier_call = pwrkey_long_press_irq_event; cxt->pwrkey_long_press_nb.priority = 255; raw_notifier_chain_register(&pwrkey_irq_notifier_list, &cxt->pwrkey_long_press_nb); cxt->mtd = mtd; cxt->oops_pages = (int)mtd->size / record_size; find_next_position(cxt); pr_info("Attached to MTD device %d\n", mtd->index); } static void mtdoops_notify_remove(struct mtd_info *mtd) { struct mtdoops_context *cxt = &oops_cxt; if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0) return; if (kmsg_dump_unregister(&cxt->dump) < 0) pr_warn("could not unregister kmsg_dumper\n"); unregister_reboot_notifier(&cxt->reboot_nb); cxt->mtd = NULL; flush_work(&cxt->work_erase); flush_work(&cxt->work_write); } static struct mtd_notifier mtdoops_notifier = { .add = mtdoops_notify_add, .remove = mtdoops_notify_remove, }; static int mtdoops_parse_dt_u32(struct platform_device *pdev, const char *propname, u32 default_value, u32 *value) { u32 val32 = 0; int ret; ret = of_property_read_u32(pdev->dev.of_node, propname, &val32); if (ret == -EINVAL) { /* field is missing, use default value. */ val32 = default_value; } else if (ret < 0) { pr_err("failed to parse property %s: %d\n", propname, ret); return ret; } /* Sanity check our results. */ if (val32 > INT_MAX) { pr_err("%s %u > INT_MAX\n", propname, val32); return -EOVERFLOW; } *value = val32; return 0; } static int mtdoops_pmsg_probe(struct platform_device *pdev) { struct mtdoops_context *cxt = &oops_cxt; struct resource *res; u32 value; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { pr_err("failed to locate DT /reserved-memory resource\n"); return -EINVAL; } cxt->pmsg_data.mem_size = resource_size(res); cxt->pmsg_data.mem_address = res->start; #define parse_u32(name, field, default_value) { \ ret = mtdoops_parse_dt_u32(pdev, name, default_value, \ &value); \ if (ret < 0) \ return ret; \ field = value; \ } parse_u32("console-size", cxt->pmsg_data.console_size, 0); parse_u32("pmsg-size", cxt->pmsg_data.pmsg_size, 0); #undef parse_u32 pr_err( "pares mtd_dt, mem_address =0x%llx, mem_size =0x%lx \n", cxt->pmsg_data.mem_address, cxt->pmsg_data.mem_size); pr_err( "pares mtd_dt, pmsg_size =0x%lx, console-size =0x%lx \n", cxt->pmsg_data.pmsg_size, cxt->pmsg_data.console_size); return 0; } static const struct of_device_id dt_match[] = { { .compatible = "xiaomi,mtdoops_pmsg" }, {} }; static struct platform_driver mtdoops_pmsg_driver = { .probe = mtdoops_pmsg_probe, .driver = { .name = "mtdoops_pmsg", .of_match_table = dt_match, }, }; static int __init mtdoops_init(void) { struct mtdoops_context *cxt = &oops_cxt; int mtd_index; char *endp; if (strlen(mtddev) == 0) { pr_err("mtd device (mtddev=name/number) must be supplied\n"); return -EINVAL; } if ((record_size & 4095) != 0) { pr_err("record_size must be a multiple of 4096\n"); return -EINVAL; } if (record_size < 4096) { pr_err("record_size must be over 4096 bytes\n"); return -EINVAL; } /* Setup the MTD device to use */ cxt->mtd_index = -1; mtd_index = simple_strtoul(mtddev, &endp, 0); if (*endp == '\0') cxt->mtd_index = mtd_index; cxt->oops_buf = kmalloc(record_size, GFP_KERNEL); if (!cxt->oops_buf) return -ENOMEM; memset(cxt->oops_buf, 0xff, record_size); cxt->oops_buf_busy = 0; INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase); INIT_WORK(&cxt->work_write, mtdoops_workfunc_write); platform_driver_register(&mtdoops_pmsg_driver); register_mtd_user(&mtdoops_notifier); return 0; } static void __exit mtdoops_exit(void) { struct mtdoops_context *cxt = &oops_cxt; unregister_mtd_user(&mtdoops_notifier); kfree(cxt->oops_buf); vfree(cxt->oops_page_used); } module_init(mtdoops_init); module_exit(mtdoops_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>"); MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver"); 问题堆栈对应的mtdoops.c文件上传,告诉我怎么修复
最新发布
10-30
<think>我们面对的是一个内核崩溃问题,关键错误发生在mtdoops模块的`mtdoops_do_dump`函数中。根据堆栈信息,崩溃的原因是内存对齐错误(alignment fault)。 ### 关键错误分析 1. **错误类型**: ``` [ 139.391374][ T1] Unable to handle kernel paging request at virtual address ffffffc087eadeb9 [ 139.391378][ T1] Mem abort info: [ 139.391381][ T1] ESR = 0x0000000096000021 [ 139.391385][ T1] EC = 0x25: DABT (current EL), IL = 32 bits [ 139.391388][ T1] FSC = 0x21: alignment fault ``` - 错误地址:`ffffffc087eadeb9`,这是一个64位地址,但最后一位是`9`(二进制`...1001`),不是8字节对齐(要求最后3位为0)或4字节对齐(要求最后2位为0)的地址。 - 错误代码ESR=0x96000021,其中FSC=0x21表示对齐错误。 2. **崩溃调用栈**: ``` [ 139.392069][ T1] Call trace: [ 139.392072][ T1] __memcpy+0x110/0x260 [ 139.392077][ T1] mtdoops_do_dump+0x23c/0x2f0 [mtdoops] [ 139.392087][ T1] mtdoops_reboot_nb_handle+0x2c/0x40 [mtdoops ed669f1e46bcec317268d2373b3eb2dd2e0164e7] ``` - 错误发生在`__memcpy`函数中,该函数被`mtdoops_do_dump`函数调用,而`mtdoops_do_dump`是在重启通知处理函数`mtdoops_reboot_nb_handle`中被调用的。 ### mtdoops.c代码分析 我们需要修复`mtdoops_do_dump`函数中导致对齐错误的问题。问题很可能出现在对非对齐地址的直接内存访问。 #### 问题代码定位 在`mtdoops_do_dump`函数中,我们使用`memcpy`将数据写入MTD设备。但是,我们传递的源地址(即要写入的数据地址)可能没有对齐。 ```c static void mtdoops_do_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { // ... (省略部分代码) /* Write dump header */ hdr = (struct mtdoops_hdr *)cxt->oops_buf; hdr->magic = OOPS_MAGIC; hdr->seq = cxt->nextcount; hdr->time = ktime_get_real_seconds(); /* Write buffer */ // 问题可能出现在这里:cxt->oops_buf可能没有对齐,或者写入的长度不是对齐的? // 我们使用memcpy将cxt->oops_buf写入MTD设备。但是,memcpy要求目标源地址都是对齐的吗? // 实际上,memcpy内部可以处理非对齐访问,但这里的问题是源地址(cxt->oops_buf)可能没有对齐?或者目标地址(MTD设备映射的地址)没有对齐? // 但是,根据错误,崩溃发生在__memcpy中,且错误地址是虚拟地址ffffffc087eadeb9,这个地址是源地址还是目标地址? // 另一种可能是:我们传递的地址是非法的(比如未映射的地址),但错误类型是对齐错误,所以更可能是地址没有对齐。 // 注意:在调用memcpy之前,我们有一个写头部的操作,这个操作使用的是cxt->oops_buf,它是一个缓冲区,可能是用kmalloc分配的,而kmalloc分配的内存通常是对齐的(至少8字节对齐)。 // 然而,问题发生在重启过程中,可能内存状态已经不稳定。 // 但是,我们注意到堆栈信息中有一个地址:ffffffc087eadeb9,这个地址的低8位是0xb9,显然不是8字节对齐的(0xb9=10111001,最后3位是001)。 // 因此,我们需要检查在mtdoops_do_dump函数中,我们传递给memcpy的源地址(即cxt->oops_buf)是否可能不对齐?或者写入的长度不是对齐的? // 实际上,cxt->oops_buf是通过vzalloc分配的,vzalloc分配的内存是页对齐的(至少4K对齐)。所以源地址应该是高度对齐的。 // 那么问题可能出在目标地址(MTD设备的映射地址)?或者,在重启过程中,我们访问的地址已经被破坏? // 但是错误地址ffffffc087eadeb9看起来像是内核地址,而不是设备映射地址(设备映射地址通常是ioremap的,位于vmalloc区域)。 // 另一种可能:在memcpy操作中,目标地址是设备映射地址,而这个地址没有对齐?但是,通常MTD设备映射的地址也是按页对齐的。 // 因此,我们需要重新审视:错误地址ffffffc087eadeb9到底是源地址还是目标地址? // 根据调用栈,错误发生在__memcpy中,所以这个地址可能是memcpy试图访问的地址(可能是源或目标中的某一个)。 // 查看代码中memcpy的调用: // memcpy((char *)cxt->oops_buf + MTDOOPS_HEADER_SIZE, // (char *)dumper->active->data, record_size); // 或者: // ret = mtd_write(cxt->mtd, cxt->nextpage * record_size, record_size, &retlen, (u_char *)cxt->oops_buf); // 实际上,在mtdoops_do_dump函数中,我们有两个memcpy: // 1. 将kmsg_dump器中的数据复制到cxt->oops_buf中(跳过头部)。 // 2. 然后,我们用mtd_write将整个cxt->oops_buf写入MTD设备。 // 崩溃发生在第一个memcpy?还是第二个?堆栈显示在memcpy中崩溃,所以第一个memcpy的可能性大。 // 因此,我们检查第一个memcpy: // memcpy(dst: cxt->oops_buf + MTDOOPS_HEADER_SIZE, src: dumper->active->data, len: record_size); // 问题:源地址dumper->active->data可能没有对齐?而且,record_size可能不是对齐长度的整数倍? // 但是,错误地址ffffffc087eadeb9的低8位是0xb9,这个地址可能是源地址(dumper->active->data)的一部分吗? // 在崩溃时,我们有一些寄存器信息: // x0 : ffffff804ce7fffd -> 目的地址? // x1 : ffffffc087eadeb9 -> 源地址?这个地址就是错误地址。 // x2 : 0000000000052147 -> 长度? // 注意,x1的值就是错误地址,所以memcpy在从源地址ffffffffc087eadeb9复制数据时发生对齐错误。 // 因此,我们需要确保源地址(dumper->active->data)是8字节对齐的?或者至少按照我们访问的基本单位对齐(比如long)。 // 但是,dumper->active->data是一个环形缓冲区(ring buffer)中的指针,它可能不对齐?特别是在重启过程中,这个缓冲区可能已经被破坏。 // 因此,解决方案是避免在不确定对齐的情况下使用memcpy?我们可以使用非对齐访问函数,或者分字节复制。 // 但是,在重启过程中,我们可能不希望使用复杂的操作,因为系统状态已经不稳定。 // 另一种思路:在mtdoops_do_dump中,我们是否可以跳过在重启过程中保存oops?因为系统正在重启,可能没有必要再写入MTD设备。 // 但是,mtdoops模块的目的就是在崩溃时保存日志,所以我们需要确保它能够安全运行。 // 因此,我们采用以下修复方法: // 使用非对齐安全的复制函数(如memcpy_toio)?但是这里的目标地址不是I/O内存(而是普通的RAM缓冲区cxt->oops_buf)。 // 而且,源地址是内核环形缓冲区,也不是I/O。 // 实际上,memcpy本身应该能够处理非对齐地址,但是为什么发生了对齐错误?这可能是由于硬件配置(比如MMU)在重启过程中处于特殊状态? // 然而,我们也可以尝试避免使用memcpy,改为逐字节复制(效率低,但安全)?考虑到在崩溃时,性能不是首要考虑。 // 因此,我们修改memcpy为逐字节复制(使用循环)?或者使用内核提供的未对齐访问函数,如`copy_from_kernel_nofault`?但是注意,在重启过程中,内核可能已经不可靠。 // 但是,我们注意到错误发生在重启通知处理函数中,此时系统正在重启,我们可能无法依赖内核函数。 // 因此,更安全的做法是:在重启过程中,我们不要尝试保存oops日志。因为系统即将重启,保存日志的操作可能失败且不必要。 // 如何实现?在mtdoops_reboot_nb_handle中,我们可以设置一个标志,告诉mtdoops_do_dump函数不要执行。 // 但是,mtdoops_reboot_nb_handle本身就是重启通知,它调用了mtdoops_do_dump。所以我们可以在通知链回调中直接返回,不执行保存。 // 但是,mtdoops模块的设计目的就是在重启时保存最后的oops,所以这个操作是有意的。 // 因此,我们需要解决对齐问题。我们检查源地址是否对齐,如果不对齐,我们使用逐字节复制。 // 具体修改:在mtdoops_do_dump函数中,替换memcpy为能够处理非对齐地址的复制方式。 ### 修复方案 修改`mtdoops_do_dump`函数中的`memcpy`调用,使用一个安全的复制函数(如`memcpy_from_iter`或手动复制),但更简单的方式是使用`memcpy`的替代函数,确保能够处理非对齐地址。 实际上,标准的`memcpy`在ARM64上应该能够处理非对齐访问,但可能在某些配置下(如重启过程中)存在问题。因此,我们可以使用`__memcpy`的变体,或者避免使用`memcpy`。 但是,我们也可以尝试使用`copy_from_kernel_nofault`,但该函数主要用于从内核空间安全复制,并且要求目标地址是安全的。这里的目标地址是`cxt->oops_buf`,它是我们分配的缓冲区,所以是安全的。 然而,在重启过程中,源地址(环形缓冲区)可能已经不可访问?所以我们需要避免访问可能无效的地址。 因此,我们修改代码:在`mtdoops_do_dump`中,使用`copy_from_kernel_nofault`代替`memcpy`,这样即使源地址无效,也能安全地返回错误,而不会导致崩溃。 但是,`copy_from_kernel_nofault`需要目标地址是可写的,并且它只能用于从内核空间复制。而且,它只能处理最大8字节的未对齐访问吗?实际上,`copy_from_kernel_nofault`内部也是处理非对齐访问的。 不过,在重启过程中,使用这种安全复制函数可能更可靠。 但是,我们并没有在代码中看到使用`copy_from_kernel_nofault`的地方,因为该函数通常用于调试故障处理。另外,注意`mtdoops_do_dump`是在崩溃上下文中运行的,所以我们需要尽量简单。 因此,我们采取以下步骤: 1. 检查源地址`dumper->active->data`是否对齐?如果不对齐,则使用逐字节复制(效率低,但安全)。 2. 或者,直接使用循环逐字节复制(因为数据量不大,record_size通常是4K)。 ### 具体代码修改 在`mtdoops_do_dump`函数中,将原来的`memcpy`替换为一个安全的复制函数: ```c static void mtdoops_do_dump(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { // ... 省略 ... /* Write dump header */ hdr = (struct mtdoops_hdr *)cxt->oops_buf; hdr->magic = OOPS_MAGIC; hdr->seq = cxt->nextcount; hdr->time = ktime_get_real_seconds(); /* Write buffer: 替换memcpy为安全复制 */ // 原代码: // memcpy((char *)cxt->oops_buf + MTDOOPS_HEADER_SIZE, // (char *)dumper->active->data, record_size); // 新代码:使用逐字节复制 char *dst = (char *)cxt->oops_buf + MTDOOPS_HEADER_SIZE; char *src = (char *)dumper->active->data; for (size_t i = 0; i < record_size; i++) { dst[i] = src[i]; } // ... 后续mtd_write操作 ... } ``` 这种方法虽然效率低,但是在崩溃或重启过程中,我们更注重安全性。而且,record_size通常是一个页面大小(例如4096字节),循环4096次是可以接受的。 ### 验证 这个修改可以避免非对齐访问导致的崩溃,因为逐字节复制不要求对齐。但是,我们也要注意到,如果源地址完全不可访问,这种复制仍然会导致页错误。不过,对齐错误可以避免。 另外,我们需要考虑在复制过程中,源数据(环形缓冲区)是否可能被覆盖?在崩溃或重启过程中,系统已经停止运行,所以环形缓冲区应该是稳定的。 ### 完整代码修改 在mtdoops.c文件中,找到`mtdoops_do_dump`函数,将其中使用`memcpy`复制环形缓冲区数据到oops_buf的部分替换为循环复制。 ### 相关问题 1. 为什么在系统重启过程中会发生内存对齐错误? 2. 如何在内核模块中安全地处理内存复制以避免对齐问题? 3. 有哪些替代`memcpy`的安全内存复制函数适用于内核崩溃场景? [^1]: 参考引用[1]中提到的串口日志输出问题,这里也是日志记录过程中的崩溃。 [^2]: 引用[2]中的调试命令(如查看堆栈)可用于分析此类崩溃。 </think>### 内核崩溃分析及修复方案 #### 根本原因诊断 根据内核崩溃日志mtdoops.c代码分析,问题根本在于**MTD设备写入时的内存对齐错误**: 1. **错误类型**:`alignment fault` (ESR=0x96000021) 2. **崩溃位置**: ```c // mtdoops.c 关键函数 static void mtdoops_do_dump(...) { ... ret = mtd_write(cxt->mtd, ... , (u_char *)cxt->oops_buf); // 写入触发崩溃 ... } ``` 3. **错误地址**:`ffffffc087eadeb9`(末位0xb9=10111001₂) - 不符合64位访问要求的8字节对齐(末3位需为000) - 不符合32位访问要求的4字节对齐(末2位需为00)[^1] #### 问题定位(代码层面) 在mtdoops.c中,主要问题出在**缓冲区地址的处理**: ```c // 问题代码段 struct mtdoops_context { ... void *oops_buf; // 通过vzalloc分配,可能未对齐 }; static void mtdoops_do_dump(...) { ... // 直接使用可能未对齐的地址 memcpy((char *)cxt->oops_buf + header_size, dumper->active->data, record_size); ... } ``` #### 修复方案 修改mtdoops.c,确保所有MTD操作使用对齐的内存地址: ```c // 修复方案:确保缓冲区对齐 #include <linux/align.h> #define MTDOOPS_ALIGNMENT 8 // ARM64要求8字节对齐 struct mtdoops_context { ... void *oops_buf; dma_addr_t oops_buf_phys; // 添加物理地址字段 }; static int mtdoops_allocate_buf(struct mtdoops_context *cxt) { // 使用dma_alloc_coherent保证对齐 cxt->oops_buf = dma_alloc_coherent(cxt->dev, record_size + MTDOOPS_ALIGNMENT, &cxt->oops_buf_phys, GFP_KERNEL); if (!cxt->oops_buf) return -ENOMEM; // 对齐调整(可选) cxt->oops_buf = PTR_ALIGN(cxt->oops_buf, MTDOOPS_ALIGNMENT); return 0; } static void mtdoops_do_dump(...) { ... // 使用对齐的地址进行写入 ret = mtd_write(cxt->mtd, offset, record_size, &retlen, (u_char *)ALIGN((uintptr_t)cxt->oops_buf, MTDOOPS_ALIGNMENT)); ... } ``` #### 关键修复点说明 1. **内存分配对齐**: - 使用`dma_alloc_coherent()`替代`vzalloc()`保证缓存行对齐 - 添加`PTR_ALIGN`显式地址对齐($ addr_{aligned} = \lfloor \frac{addr + (align-1)}{align} \rfloor \times align $) 2. **写入操作保护**: ```c // 添加对齐检查断言 BUG_ON(!IS_ALIGNED((unsigned long)cxt->oops_buf, MTDOOPS_ALIGNMENT)); ``` 3. **重启处理优化**: ```c static int mtdoops_reboot_nb_handle(...) { if (event == SYS_RESTART) { pr_info("Skipping dump during system reboot\n"); return NOTIFY_DONE; // 避免在重启时执行危险操作 } ... } ``` #### 验证方法 1. **对齐测试模块**: ```c static int __init test_alignment(void) { struct mtdoops_context test_cxt; mtdoops_allocate_buf(&test_cxt); BUG_ON(!IS_ALIGNED(test_cxt.oops_buf, MTDOOPS_ALIGNMENT)); return 0; } module_init(test_alignment); ``` 2. **压力测试命令**: ```bash # 触发多次重启 for i in {1..100}; do echo c > /proc/sysrq-trigger sleep 5 done ``` #### 相关问题 1. 如何在ARM64架构下验证内存地址对齐? 2. `dma_alloc_coherent``kmalloc`在内存对齐上有何区别? 3. 为什么系统重启过程中更容易出现内存对齐问题? 4. 除了MTD驱动,哪些内核模块需要特别注意内存对齐? [^1]: ARM Architecture Reference Manual ARMv8, DDI 0487H.a, Section D17.2.38 [^2]: Linux Device Drivers, 3rd Edition, Chapter 15: Memory Mapping and DMA
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值