mtdoops 和 kdump 原理

本文介绍了kdump如何在kernel panic时捕获log信息,并利用mtdoops将这些信息备份到mtd分区。通过工作队列mtdoops_workfunc_write在drivers/mtd/mtdoops.c中实现此功能。

kdump 可以记录 kernel panic 等log信息,他的原理就是建立一个工作队列,把kmsg log 备份到一个mtd分区上。

是在drivers/mtd/mtdoops.c 中实现的。

1. module_init/ module_exit

module_init(mtdoops_init);
module_exit(mtdoops_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Richard Purdie <rpurdie@openedhand.com>");
MODULE_DESCRIPTION("MTD Oops/Panic console logger/driver");

static int __init mtdoops_init(void)
{
        struct mtdoops_context *cxt = &oops_cxt;
        int mtd_index;
        char *endp;

        if (strlen(mtddev) == 0) {
                printk(KERN_ERR "mtdoops: mtd device (mtddev=name/number) must be supplied\n");
                return -EINVAL;
        }
        if ((record_size & 4095) != 0) {
                printk(KERN_ERR "mtdoops: record_size must be a multiple of 4096\n");
                return -EINVAL;
        }
        if (record_size < 4096) {
                printk(KERN_ERR "mtdoops: record_size must be over 4096 bytes\n");
                return -EINVAL;
        }

        /* Setup the MTD device to use */
        cxt->mtd_index = -1;
        mtd_index = simple_strtoul(mtddev, &endp, 0);
        if (*endp == '\0')
                cxt->mtd_index = mtd_index;
        if (cxt->mtd_index > MAX_MTD_DEVICES) {
                printk(KERN_ERR "mtdoops: invalid mtd device number (%u) given\n",
                                mtd_index);
                return -EINVAL;
        }

        cxt->oops_buf = vmalloc(record_size);
        if (!cxt->oops_buf) {
                printk(KERN_ERR "mtdoops: failed to allocate buffer workspace\n");
                return -ENOMEM;
        }
        memset(cxt->oops_buf, 0xff, record_size);

        //建了2个工作队列,一个擦除,一个写入
        INIT_WORK(&cxt->work_erase, mtdoops_workfunc_erase);
        INIT_WORK(&cxt->work_write, mtdoops_workfunc_write);

        //注册mtd user, 用来在用户空间操作MTD。
        register_mtd_user(&mtdoops_notifier);
        return 0;
}

static void __exit mtdoops_exit(void)
{
        struct mtdoops_context *cxt = &oops_cxt;

        unregister_mtd_user(&mtdoops_notifier);
        vfree(cxt->oops_buf);
        vfree(cxt->oops_page_used);
}

2. mtdoops_context 结构体:

static struct mtdoops_context {
        struct kmsg_dumper dump;
        
        int mtd_index;
        struct work_struct work_erase;
        struct work_struct work_write;
        struct mtd_info *mtd;
        int oops_pages;
        int nextpage;           
        int nextcount; 
        unsigned long *oops_page_used;

        void *oops_buf; 
} oops_cxt;
kmsg_dumper

/**     
 * struct kmsg_dumper - kernel crash message dumper structure
 * @dump:       The callback which gets called on crashes. The buffer is passed
 *              as two sections, where s1 (length l1) contains the older
 *              messages and s2 (length l2) contains the newer.
 * @list:       Entry in the dumper list (private)
 * @registered: Flag that specifies if this is already registered
 */     
struct kmsg_dumper {
        void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
                        const char *s1, unsigned long l1,
                        const char *s2, unsigned long l2);
        struct list_head list;
        int registered;
};
enum kmsg_dump_reason {
        KMSG_DUMP_OOPS, 
        KMSG_DUMP_PANIC,
        KMSG_DUMP_KEXEC,
}; 
3. mtdoops_workfunc_erase --- 擦除

/* Scheduled work - when we can't proceed without erasing a block */
static void mtdoops_workfunc_erase(struct work_struct *work)
{               
        struct mtdoops_context *cxt =
                        container_of(work, struct mtdoops_context, work_erase);
        struct mtd_info *mtd = cxt->mtd;
        int i = 0, j, ret, mod;
        
        /* We were unregistered */
        if (!mtd)
                return;
                
        mod = (cxt->nextpage * record_size) % mtd->erasesize;
        if (mod != 0) {
                cxt->nextpage = cxt->nextpage + ((mtd->erasesize - mod) / record_size);
                if (cxt->nextpage >= cxt->oops_pages)
                        cxt->nextpage = 0;
        }

        while (mtd->block_isbad) {
                ret = mtd->block_isbad(mtd, cxt->nextpage * record_size);
                if (!ret)
                        break;
                if (ret < 0) {
                        printk(KERN_ERR "mtdoops: block_isbad failed, aborting\n");
                        return;
                }
badblock:
                printk(KERN_WARNING "mtdoops: bad block at %08lx\n",
                       cxt->nextpage * record_size);
                i++;
                cxt->nextpage = cxt->nextpage + (mtd->erasesize / record_size);
                if (cxt->nextpage >= cxt->oops_pages)
                        cxt->nextpage = 0;
                if (i == cxt->oops_pages / (mtd->erasesize / record_size)) {
                        printk(KERN_ERR "mtdoops: all blocks bad!\n");
                        return;
                }
        }

        for (j = 0, ret = -1; (j < 3) && (ret < 0); j++)
                ret = mtdoops_erase_block(cxt, cxt->nextpage * record_size);
        if (ret >= 0) {
                printk(KERN_DEBUG "mtdoops: ready %d, %d\n",
                       cxt->nextpage, cxt->nextcount);
                return;
        }

        if (mtd->block_markbad && ret == -EIO) {
                ret = mtd->block_markbad(mtd, cxt->nextpage * record_size);
                if (ret < 0) {
                        printk(KERN_ERR "mtdoops: block_markbad failed, aborting\n");
                        return;
                }
        }
        goto badblock;
}

4. mtdoops_workfunc_write(struct work_struct *work)

static void mtdoops_workfunc_write(struct work_struct *work)
{       
        struct mtdoops_context *cxt =
                        container_of(work, struct mtdoops_context, work_write);
        
        mtdoops_write(cxt, 0);
}
static void mtdoops_write(struct mtdoops_context *cxt, int panic)

static void mtdoops_write(struct mtdoops_context *cxt, int panic)
{
        struct mtd_info *mtd = cxt->mtd;
        size_t retlen;
        u32 *hdr;
        int ret;

        /* Add mtdoops header to the buffer */
        hdr = cxt->oops_buf;
        hdr[0] = cxt->nextcount;
        hdr[1] = MTDOOPS_KERNMSG_MAGIC;

        if (panic)
                ret = mtd->panic_write(mtd, cxt->nextpage * record_size,
                                        record_size, &retlen, cxt->oops_buf);
        else
                ret = mtd->write(mtd, cxt->nextpage * record_size,
                                        record_size, &retlen, cxt->oops_buf);

        if (retlen != record_size || ret < 0)
                printk(KERN_ERR "mtdoops: write failure at %ld (%td of %ld written), error %d\n",
                       cxt->nextpage * record_size, retlen, record_size, ret);
        mark_page_used(cxt, cxt->nextpage);
        memset(cxt->oops_buf, 0xff, record_size);

        mtdoops_inc_counter(cxt);
}
static void mark_page_used(struct mtdoops_context *cxt, int page)

static void mark_page_used(struct mtdoops_context *cxt, int page)
{       
        set_bit(page, cxt->oops_page_used);
}      
static void mtdoops_inc_counter(struct mtdoops_context *cxt)

static void mtdoops_inc_counter(struct mtdoops_context *cxt)
{       
        cxt->nextpage++;
        if (cxt->nextpage >= cxt->oops_pages)
                cxt->nextpage = 0;
        cxt->nextcount++;
        if (cxt->nextcount == 0xffffffff) 
                cxt->nextcount = 0;
        
        if (page_is_used(cxt, cxt->nextpage)) {
                schedule_work(&cxt->work_erase);
                return;
        }

        printk(KERN_DEBUG "mtdoops: ready %d, %d (no erase)\n",
               cxt->nextpage, cxt->nextcount);
}       
5. mtdoops_notifier

static struct mtd_notifier mtdoops_notifier = {
        .add    = mtdoops_notify_add,
        .remove = mtdoops_notify_remove,
};     
static void mtdoops_notify_add(struct mtd_info *mtd)

static void mtdoops_notify_add(struct mtd_info *mtd)
{
        struct mtdoops_context *cxt = &oops_cxt;
        u64 mtdoops_pages = div_u64(mtd->size, record_size);
        int err;

        if (!strcmp(mtd->name, mtddev))
                cxt->mtd_index = mtd->index;

        if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0)
                return;

        if (mtd->size < mtd->erasesize * 2) {
                printk(KERN_ERR "mtdoops: MTD partition %d not big enough for mtdoops\n",
                       mtd->index);
                return;
        }
        if (mtd->erasesize < record_size) {
                printk(KERN_ERR "mtdoops: eraseblock size of MTD partition %d too small\n",
                       mtd->index);
                return;
        }
        if (mtd->size > MTDOOPS_MAX_MTD_SIZE) {
                printk(KERN_ERR "mtdoops: mtd%d is too large (limit is %d MiB)\n",
                       mtd->index, MTDOOPS_MAX_MTD_SIZE / 1024 / 1024);
                return;
        }

        /* oops_page_used is a bit field */
        cxt->oops_page_used = vmalloc(DIV_ROUND_UP(mtdoops_pages,
                        BITS_PER_LONG));
        if (!cxt->oops_page_used) {
                printk(KERN_ERR "mtdoops: could not allocate page array\n");
                return;
        }

        cxt->dump.dump = mtdoops_do_dump;
        err = kmsg_dump_register(&cxt->dump);
        if (err) {
                printk(KERN_ERR "mtdoops: registering kmsg dumper failed, error %d\n", err);
                vfree(cxt->oops_page_used);
                cxt->oops_page_used = NULL;
                return;
        }

        cxt->mtd = mtd;
        cxt->oops_pages = (int)mtd->size / record_size;
        find_next_position(cxt);
        printk(KERN_INFO "mtdoops: Attached to MTD device %d\n", mtd->index);
}
static void mtdoops_notify_remove(struct mtd_info *mtd)

static void mtdoops_notify_remove(struct mtd_info *mtd)
{
        struct mtdoops_context *cxt = &oops_cxt;

        if (mtd->index != cxt->mtd_index || cxt->mtd_index < 0)
                return;

        if (kmsg_dump_unregister(&cxt->dump) < 0)
                printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n");

        cxt->mtd = NULL;
        flush_scheduled_work();
}













评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值