linux的nvme驱动参数调优

本文介绍了NVMe存储设备的调优方法,包括调整nomerges、rq_affinity和add_random等参数,以及如何合理设置读取预取(readahead)大小和中断绑定策略,以优化NVMe性能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

nvme的设备,可以调优的参数比较少,相关的代码如下:

blk_sysfs.c

static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
.store = queue_requests_store,
};

static struct queue_sysfs_entry queue_ra_entry = {
.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
.show = queue_ra_show,
.store = queue_ra_store,
};

static struct queue_sysfs_entry queue_max_sectors_entry = {
.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
.show = queue_max_sectors_show,
.store = queue_max_sectors_store,
};

static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
.show = queue_max_hw_sectors_show,
};

static struct queue_sysfs_entry queue_max_segments_entry = {
.attr = {.name = "max_segments", .mode = S_IRUGO },
.show = queue_max_segments_show,
};

static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
.show = queue_max_integrity_segments_show,
};

static struct queue_sysfs_entry queue_max_segment_size_entry = {
.attr = {.name = "max_segment_size", .mode = S_IRUGO },
.show = queue_max_segment_size_show,
};

static struct queue_sysfs_entry queue_iosched_entry = {
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
.show = elv_iosched_show,
.store = elv_iosched_store,
};

static struct queue_sysfs_entry queue_hw_sector_size_entry = {
.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
.show = queue_logical_block_size_show,
};

static struct queue_sysfs_entry queue_logical_block_size_entry = {
.attr = {.name = "logical_block_size", .mode = S_IRUGO },
.show = queue_logical_block_size_show,
};

static struct queue_sysfs_entry queue_physical_block_size_entry = {
.attr = {.name = "physical_block_size", .mode = S_IRUGO },
.show = queue_physical_block_size_show,
};

static struct queue_sysfs_entry queue_io_min_entry = {
.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
.show = queue_io_min_show,
};

static struct queue_sysfs_entry queue_io_opt_entry = {
.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
.show = queue_io_opt_show,
};

static struct queue_sysfs_entry queue_discard_granularity_entry = {
.attr = {.name = "discard_granularity", .mode = S_IRUGO },
.show = queue_discard_granularity_show,
};

static struct queue_sysfs_entry queue_discard_max_entry = {
.attr = {.name = "discard_max_bytes", .mode = S_IRUGO },
.show = queue_discard_max_show,
};

static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
.show = queue_discard_zeroes_data_show,
};

static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};

static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
.store = queue_store_nonrot,
};

static struct queue_sysfs_entry queue_unpriv_sgio_entry = {
.attr = {.name = "unpriv_sgio", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_unpriv_sgio,
.store = queue_store_unpriv_sgio,
};

static struct queue_sysfs_entry queue_nomerges_entry = {
.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
.show = queue_nomerges_show,
.store = queue_nomerges_store,
};

static struct queue_sysfs_entry queue_rq_affinity_entry = {
.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};

static struct queue_sysfs_entry queue_iostats_entry = {
.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_iostats,
.store = queue_store_iostats,
};

static struct queue_sysfs_entry queue_random_entry = {
.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_random,
.store = queue_store_random,
};

参数列表如下:

[root@localhost queue]# ls -alrt *
-rw-r--r-- 1 root root 4096 Dec 18 19:58 read_ahead_kb
-rw-r--r-- 1 root root 4096 Dec 18 20:01 nomerges
-rw-r--r-- 1 root root 4096 Dec 18 20:34 rq_affinity
-rw-r--r-- 1 root root 4096 Dec 19 08:39 max_sectors_kb
-rw-r--r-- 1 root root 4096 Dec 19 08:47 nr_requests
-rw-r--r-- 1 root root 4096 Dec 19 08:54 iostats
-r--r--r-- 1 root root 4096 Dec 19 08:54 write_same_max_bytes
-rw-r--r-- 1 root root 4096 Dec 19 08:54 unpriv_sgio
-rw-r--r-- 1 root root 4096 Dec 19 08:54 scheduler
-rw-r--r-- 1 root root 4096 Dec 19 08:54 rotational
-r--r--r-- 1 root root 4096 Dec 19 08:54 physical_block_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 optimal_io_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 minimum_io_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_segments
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_segment_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_integrity_segments
-r--r--r-- 1 root root 4096 Dec 19 08:54 max_hw_sectors_kb
-r--r--r-- 1 root root 4096 Dec 19 08:54 logical_block_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 hw_sector_size
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_zeroes_data
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_max_bytes
-r--r--r-- 1 root root 4096 Dec 19 08:54 discard_granularity
-rw-r--r-- 1 root root 4096 Dec 19 08:54 add_random

其中属性为只读的,肯定直接通过/sys/没法修改,有的硬编码的跟驱动相关,可以尝试修改驱动。其余可以尝试调优的参数如下:

1.nomerges (RW)
------------- This enables the user to disable the lookup logic involved with IO merging requests in the block layer. By default (0) all merges are enabled. When set to 1 only simple one-hit merges will be tried. When set to 2 no merge algorithms will be tried (including one-hit or more complex tree/hash lookups).

这个根据打开的统计,发现iostat里面前面的两列关于merge的,都一直为0,所以干脆设置为不要merge,可以减少一段代码逻辑,代码中会判断queue的merge属性,

当其值为0,说明  QUEUE_FLAG_NOXMERGES 和 QUEUE_FLAG_NOMERGES 都没有设置。这个设置为2,表示不需要merge,机械盘一般设置为需要merge,相关代码如下:

static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
                    size_t count)
{
    unsigned long nm;
    ssize_t ret = queue_var_store(&nm, page, count);

    if (ret < 0)
        return ret;

    spin_lock_irq(q->queue_lock);
    queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
    queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
    if (nm == 2)
        queue_flag_set(QUEUE_FLAG_NOMERGES, q);-----------值为2,则设置QUEUE_FLAG_NOMERGES
    else if (nm)
        queue_flag_set(QUEUE_FLAG_NOXMERGES, q); ----------值为非0,则设置QUEUE_FLAG_NOXMERGES
    spin_unlock_irq(q->queue_lock); 
    return ret; 
}

 

2.rq_affinity (RW)
---------------- If this option is '1', the block layer will migrate request completions to the cpu "group" that originally submitted the request. For some workloads this provides a significant reduction in CPU cycles due to caching effects.
For storage configurations that need to maximize distribution of completion processing setting this option to '2' forces the completion to run on the requesting cpu (bypassing the "group" aggregation logic).

因为cache的命中,这个设置为2,可以减少cpu使用。

 

3.add_random (RW)
---------------- This file allows to trun off the disk entropy contribution. Default value of this file is '1'(on).

这个最好设置为0,可以减少一点点性能消耗。

 

readahead参数,是如何影响nvme的性能的?一开始以为这个对机械盘影响较大,后来根据追踪代码,发现对io的及时性还是有较大的提高。

下面的代码描述了初始化阶段设置的大小,这个如果可以的话,建议设置大一些,比如1M。

 

struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
    struct request_queue *q;
    struct queue_limits_aux *limits_aux = NULL;
    int err;

    q = kmem_cache_alloc_node(blk_requestq_cachep,
                gfp_mask | __GFP_ZERO, node_id);
    if (!q)
        return NULL;

    q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
    if (q->id < 0)
        goto fail_q;

    q->backing_dev_info.ra_pages =
            (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;//初始化预读参数,默认为128k

 

4.中断绑核

我们知道,nvme的队列名称,其实是根据核数来编号的,因为admin的队列和io队列的第一个是共享一个中断的,所以他俩的中断数会相对比其他io队列多一些,由于队列默认就是跟随

cpu号而绑定的,所以中断号,最好送到指定的cpu上去,因为中断上下文毕竟是要访问内存的,具体怎么绑,可以参照如下:

查看/proc/interrupt,中断名称是nvme0q0,当然类似的nvme1q0也是,以此类推,这个肯定是admin队列。

io队列就是nvme0q1----nvme0qx,其中x就是cpu的核数。

nvme0q1这个对列,其实默认就是在cpu0上,那么对应的中断,最好也绑在cpu0上。

nvme0q30这个队列,默认在cpu29上,那么对应的中断,最好也绑在cpu29上。以此类推。


转载于:https://www.cnblogs.com/10087622blog/p/8063987.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值