nvme 驱动详解[转]

本文详细介绍了NVMe驱动的初始化过程,包括Kconfig配置、依赖关系、核心文件、PRP内存池创建、队列分配、中断注册等关键步骤。通过分析nvme_core.c和nvme_scsi.c等文件,揭示了NVMe驱动如何与Block层交互并启动设备。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

nvme 驱动详解 之1
http://blog.youkuaiyun.com/qqqqqq999999/article/details/47732319

首先打开driver/block下的kconfig文件,其中定义了BLK_DEV_NVMEconfig,如下。

config BLK_DEV_NVME

     tristate "NVMExpress block device"

     depends on PCI

     ---help---

       The NVM Express driver is for solid statedrives directly

       connected to the PCI or PCI Express bus.  If you know you

       don't have one of these, it is safe to answerN.



       To compile this driver as a module, choose Mhere: the

       module will be called nvme.

通过console,输入makemenuconfig,搜索BLK_DEV_NEME得到如下依赖关系。

Symbol: BLK_DEV_NVME [=m]

| Type : tristate

| Prompt: NVM Express block device

| Location:

| -> Device Drivers

| (1) -> Block devices (BLK_DEV [=y])

| Defined at drivers/block/Kconfig:313

| Dependson: BLK_DEV [=y] && PCI [=y]

可以看到nemv 依赖于BLK和PCI 。

打开driver/block/Makefile,搜索NVME,可以看到:

obj-$(CONFIG_BLK_DEV_NVME) += nvme.o

nvme-y := nvme-core.o nvme-scsi.o

关于和BLK相关的文件,打开block/Makefile:

obj-$(CONFIG_BLOCK) := bio.oelevator.o blk-core.o blk-tag.o blk-sysfs.o
blk-flush.o blk-settings.o blk-ioc.oblk-map.o
blk-exec.o blk-merge.o blk-softirq.oblk-timeout.o
blk-iopoll.o blk-lib.o blk-mq.oblk-mq-tag.o
blk-mq-sysfs.o blk-mq-cpu.oblk-mq-cpumap.o ioctl.o
genhd.o scsi_ioctl.o partition-generic.oioprio.o
partitions/

哇塞,是不是很多?不要担心,NVME也只是用了BLOCK层的一些函数而已,不用把所用与BLOCK相关的文件都看了,除非你有精力去研究。

好了,到目前为止,我们知道了要看哪些文件了,nvme-core.cnvme-scsi.c是必须的,剩下的就是当我们的driver调用到block层哪些函数再去研究。

打开nvme-core,查看入口函数,module_init(nvme_init);

static int __init nvme_init(void)

{

     int result;



     init_waitqueue_head(&nvme_kthread_wait);//创建等待队列



     nvme_workq =create_singlethread_workqueue("nvme");//创建工作队列

     if (!nvme_workq)

               return -ENOMEM;



     result= register_blkdev(nvme_major, "nvme");//注册块设备

     if (result < 0)

               goto kill_workq;

     else if (result > 0)

               nvme_major = result;



     result= pci_register_driver(&nvme_driver);//注册pci driver

     if (result)

               goto unregister_blkdev;

     return 0;

unregister_blkdev:

     unregister_blkdev(nvme_major, "nvme");

kill_workq:

     destroy_workqueue(nvme_workq);

     return result;

}

注册pci driver后,会调用nvme_driver中的probe函数。发现开始总是美好的,函数是如此的简洁,不要高兴的太早,痛苦的经历正在逼近。

static int nvme_probe(struct pci_devpdev, const struct pci_device_id id)

{

     int node, result = -ENOMEM;

     struct nvme_dev *dev;



     node = dev_to_node(&pdev->dev);//获取node节点,与NUMA系统有关。

     if (node == NUMA_NO_NODE)

               set_dev_node(&pdev->dev, 0);



     dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);

     if (!dev)

               return -ENOMEM;

     dev->entry = kzalloc_node(num_possible_cpus() *sizeof(*dev->entry),//分配msix-entry

                                                             GFP_KERNEL,node);

     if (!dev->entry)

               goto free;

     dev->queues = kzalloc_node((num_possible_cpus() + 1) *sizeof(void *),//分配queues 资源,

                                                             GFP_KERNEL,node);//这里之所以多1,是因为有admin-queues

     if (!dev->queues)

               goto free;



     INIT_LIST_HEAD(&dev->namespaces);//初始化namespaces链表。

     dev->reset_workfn = nvme_reset_failed_dev;

     INIT_WORK(&dev->reset_work, nvme_reset_workfn);

     dev->pci_dev = pci_dev_get(pdev);

     pci_set_drvdata(pdev, dev);

     result = nvme_set_instance(dev);//设置pci设备的句柄instance,代表该设备。

     if (result)

               goto put_pci;



     result = nvme_setup_prp_pools(dev);//设置dma需要的prp内存池。

     if (result)

               goto release;



     kref_init(&dev->kref);

     result = nvme_dev_start(dev);//创建admin queue、 io queue 、request irq

     if (result)

               goto release_pools;



     if (dev->online_queues > 1)

               result = nvme_dev_add(dev);//初始化mq,并增加一个实际可用的nvme dev,并且admin_queue可以发送cmd。

     if (result)

               goto shutdown;



     scnprintf(dev->name, sizeof(dev->name),"nvme%d", dev->instance);

     dev->miscdev.minor = MISC_DYNAMIC_MINOR;

     dev->miscdev.parent = &pdev->dev;

     dev->miscdev.name = dev->name;

     dev->miscdev.fops = &nvme_dev_fops;

     result = misc_register(&dev->miscdev);//注册一个misc设备

     if (result)

               goto remove;



     nvme_set_irq_hints(dev);



     dev->initialized = 1;

     return 0;

remove:

     nvme_dev_remove(dev);

     nvme_dev_remove_admin(dev);

     nvme_free_namespaces(dev);

shutdown:

     nvme_dev_shutdown(dev);

release_pools:

     nvme_free_queues(dev, 0);

     nvme_release_prp_pools(dev);

release:

     nvme_release_instance(dev);

put_pci:

     pci_dev_put(dev->pci_dev);

free:

     kfree(dev->queues);

     kfree(dev->entry);

     kfree(dev);

     return result;

}

上面每一个主要功能的函数都简单了注释了一下,描述了做的哪些工作,下面具体看看那些函数怎么实现的。

static int nvme_set_instance(structnvme_dev *dev)

{

     int instance, error;



     do {

               if (!ida_pre_get(&nvme_instance_ida,GFP_KERNEL))

                        return -ENODEV;



               spin_lock(&dev_list_lock);

               error = ida_get_new(&nvme_instance_ida,&instance);

               spin_unlock(&dev_list_lock);

     } while (error == -EAGAIN);



     if (error)

               return -ENODEV;



     dev->instance = instance;//该函数获得设备的instance,相当于该设备的id,代表着该设备。

     return 0;

}

Nvme_setup_prp_pools用来创建dma时所用的内存池,prp_page_pool是虚拟内核地址,

static int nvme_setup_prp_pools(structnvme_dev *dev)

{

     struct device *dmadev = &dev->pci_dev->dev;

     dev->prp_page_pool = dma_pool_create("prp listpage", dmadev,

                                                    PAGE_SIZE,PAGE_SIZE, 0);

     if (!dev->prp_page_pool)

               return -ENOMEM;



     /* Optimisation for I/Os between 4k and 128k */

     dev->prp_small_pool = dma_pool_create("prp list256", dmadev,

                                                    256, 256, 0);

     if (!dev->prp_small_pool) {

               dma_pool_destroy(dev->prp_page_pool);

               return -ENOMEM;

     }

     return 0;

}

下面是一个重量级的函数之一,nvme_dev_start;

static intnvme_dev_start(struct nvme_dev *dev)

{

     int result;

     bool start_thread = false;



     result = nvme_dev_map(dev);

     if (result)

               return result;



     result = nvme_configure_admin_queue(dev);//配置adminsubmit queue 和complete queue,64 depth

     if (result)

               goto unmap;



     spin_lock(&dev_list_lock);

     if (list_empty(&dev_list) &&IS_ERR_OR_NULL(nvme_thread)) {

               start_thread = true;

               nvme_thread = NULL;

     }

     list_add(&dev->node, &dev_list);

    
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值