下面是对你提供的 `vhost-user RDMA` 程序进行 **Linux 应用程序标准风格重构**后的版本,包括:
- 更清晰的代码结构
- 符合 Linux 内核/用户态编程规范(如命名、日志、错误处理)
- 添加完整的英文注释(函数、变量、逻辑流程)
- 使用标准 C 风格(避免宏滥用,合理使用常量)
- 改进内存管理与资源释放
- 增加必要的错误检查和 cleanup 流程
---
```c
/*
* Vhost-user RDMA Device - Initialization and Packet Forwarding
*
* Copyright (C) 2025 KylinSoft Inc. All rights reserved.
*
* Author: Xiong Weimin <xiongweimin@kylinos.cn>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <signal.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
/* DPDK headers */
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_debug.h>
#include <rte_log.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ring.h>
#include <rte_malloc.h>
/* Local headers */
#include "vhost_rdma_ib.h"
#include "vhost_rdma.h"
#include "vhost_rdma_pkt.h"
#include "vhost_rdma_log.h"
/**
* Maximum length for Unix socket path
*/
#define SOCKET_PATH_MAX 64
/**
* Default number of RX/TX descriptors
*/
#define MAX_NB_RXD 1024
#define MAX_NB_TXD 1024
/**
* Size of shared rings between vhost devices and datapath
*/
#define MAX_RING_COUNT 1024
/**
* Default number of mbufs in memory pool
*/
#define NUM_MBUFS_DEFAULT (1UL << 16) // 65536
/**
* Cache size for per-lcore mbuf cache
*/
#define MBUF_CACHE_SIZE 256
/**
* Data buffer size in each mbuf
*/
#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
/* Forward declarations */
struct vhost_rdma_device;
extern struct vhost_rdma_device g_vhost_rdma_dev[];
/* Global configuration */
static char *socket_path = NULL; /* Array of socket paths */
static int nb_sockets = 0; /* Number of vhost sockets */
static uint16_t pair_port_id = UINT16_MAX; /* Physical port ID to forward packets */
static volatile bool force_quit = false; /* Signal to exit cleanly */
/* Stats and feature flags */
static uint32_t enable_stats = 0; /* Enable periodic stats printing (seconds) */
static uint32_t enable_tx_csum = 0; /* Enable TX checksum offload */
static int total_num_mbufs = NUM_MBUFS_DEFAULT;/* Total mbufs across pools */
/* Shared resources */
static struct rte_ring *vhost_rdma_rx_ring = NULL;
static struct rte_ring *vhost_rdma_tx_ring = NULL;
static struct rte_mempool *vhost_rdma_mbuf_pool = NULL;
/* Per-lcore info for device management */
struct lcore_info {
uint32_t device_num;
volatile uint8_t dev_removal_flag;
TAILQ_HEAD(vhost_dev_tailq_list, vhost_rdma_device) vdev_list;
};
static struct lcore_info lcore_info[RTE_MAX_LCORE];
static unsigned lcore_ids[RTE_MAX_LCORE];
/* Port configuration templates */
static struct rte_eth_conf default_port_config = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
.rss_conf = {
.rss_hf = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP,
}
};
static struct rte_eth_conf offload_port_config = {
.rxmode = {
.offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM,
.mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = RTE_ETHER_MAX_LEN,
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
.offloads = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
RTE_ETH_TX_OFFLOAD_TCP_CKSUM,
},
.rss_conf = {
.rss_hf = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP,
}
};
/**
* @brief Unregister all registered vhost drivers.
*
* Called during signal cleanup to ensure no stale sockets remain.
*
* @param socket_num Number of socket paths to unregister
*/
static void
unregister_drivers(int socket_num)
{
int i, ret;
for (i = 0; i < socket_num; i++) {
const char *path = socket_path + i * PATH_MAX;
ret = rte_vhost_driver_unregister(path);
if (ret != 0) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to unregister vhost driver for socket %s\n", path);
} else {
RTE_LOG(INFO, VHOST_CONFIG, "Unregistered socket: %s\n", path);
}
}
}
/**
* @brief Signal handler for graceful shutdown (SIGINT).
*
* Cleans up vhost driver registrations and exits.
*/
static void
vhost_rdma_signal_handler(__rte_unused int signum)
{
RTE_LOG(INFO, VHOST_CONFIG, "Received SIGINT, shutting down...\n");
force_quit = true;
unregister_drivers(nb_sockets);
exit(0);
}
/**
* @brief Initialize an Ethernet port with given offload settings.
*
* Configures one RX/TX queue, sets up descriptor rings, starts the port.
*
* @param port_id The port identifier
* @param offload Whether to enable hardware offloads
* @return 0 on success, negative on failure
*/
static int
vhost_rdma_init_port(uint16_t port_id, bool offload)
{
int ret;
uint16_t nb_rxd = MAX_NB_RXD;
uint16_t nb_txd = MAX_NB_TXD;
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = offload ? offload_port_config : default_port_config;
struct rte_eth_txconf txconf;
struct rte_ether_addr addr;
char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
RTE_LOG(INFO, VHOST_CONFIG, "Initializing port %u with %s offloads\n",
port_id, offload ? "enabled" : "disabled");
ret = rte_eth_dev_info_get(port_id, &dev_info);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to get device info for port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to configure port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
if (ret < 0) {
RTE_LOG(WARNING, VHOST_CONFIG,
"Failed to adjust number of descriptors for port %u\n", port_id);
}
ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd,
rte_eth_dev_socket_id(port_id),
NULL,
vhost_rdma_mbuf_pool);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to setup RX queue for port %u\n", port_id);
goto out;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd,
rte_eth_dev_socket_id(port_id),
&txconf);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to setup TX queue for port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_start(port_id);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to start port %u\n", port_id);
goto out;
}
ret = rte_eth_promiscuous_enable(port_id);
if (ret < 0) {
RTE_LOG(WARNING, VHOST_CONFIG, "Failed to enable promiscuous mode on port %u\n", port_id);
}
ret = rte_eth_macaddr_get(port_id, &addr);
if (ret == 0) {
rte_ether_format_addr(mac_str, sizeof(mac_str), &addr);
RTE_LOG(INFO, VHOST_CONFIG, "Port %u MAC address: %s\n", port_id, mac_str);
} else {
RTE_LOG(WARNING, VHOST_CONFIG, "Could not read MAC address for port %u\n", port_id);
}
out:
return ret;
}
/**
* @brief Print usage information.
*/
static void
vhost_rdma_usage(const char *prgname)
{
printf("%s [EAL options] --\n"
" -p PORTMASK\n"
" --socket-file <path> : Path to vhost-user socket (can be repeated)\n"
" --stats <N> : Print stats every N seconds (0=disable)\n"
" --tx-csum <0|1> : Disable/enable TX checksum offload\n"
" --total-num-mbufs <N> : Total number of mbufs in pool (default: %d)\n",
prgname, NUM_MBUFS_DEFAULT);
}
/**
* @brief Parse a numeric option safely.
*
* @param q_arg Input string
* @param max_valid_value Maximum allowed value
* @return Parsed integer or -1 on error
*/
static int
vhost_rdma_parse_num_opt(const char *q_arg, uint32_t max_valid_value)
{
char *end = NULL;
unsigned long num;
errno = 0;
num = strtoul(q_arg, &end, 10);
if (!q_arg || q_arg[0] == '\0' || end == NULL || *end != '\0')
return -1;
if (errno != 0 || num > max_valid_value)
return -1;
return (int)num;
}
/**
* @brief Parse and store vhost socket path.
*
* Supports multiple sockets via repeated --socket-file.
*
* @param q_arg Socket file path
* @return 0 on success, -1 on failure
*/
static int
vhost_rdma_parse_socket_path(const char *q_arg)
{
char *old_ptr;
if (strnlen(q_arg, SOCKET_PATH_MAX) >= SOCKET_PATH_MAX) {
RTE_LOG(ERR, VHOST_CONFIG, "Socket path too long: %s\n", q_arg);
return -1;
}
old_ptr = socket_path;
socket_path = realloc(socket_path, SOCKET_PATH_MAX * (nb_sockets + 1));
if (socket_path == NULL) {
free(old_ptr);
return -1;
}
strncpy(socket_path + nb_sockets * SOCKET_PATH_MAX, q_arg, SOCKET_PATH_MAX - 1);
socket_path[(nb_sockets + 1) * SOCKET_PATH_MAX - 1] = '\0';
RTE_LOG(INFO, VHOST_CONFIG, "Registered socket[%d]: %s\n",
nb_sockets, socket_path + nb_sockets * SOCKET_PATH_MAX);
nb_sockets++;
return 0;
}
/**
* @brief Parse command-line arguments.
*
* Supported options:
* --socket-file, --stats, --tx-csum, --total-num-mbufs
*
* @param argc Argument count
* @param argv Argument vector
* @return 0 on success, -1 on failure
*/
static int
vhost_rdma_parse_args(int argc, char **argv)
{
int opt, ret;
int option_idx;
const char *prgname = argv[0];
static struct option lgopts[] = {
{ "stats", required_argument, NULL, OPT_STATS_NUM },
{ "socket-file", required_argument, NULL, OPT_SOCKET_FILE_NUM },
{ "tx-csum", required_argument, NULL, OPT_TX_CSUM_NUM },
{ "total-num-mbufs", required_argument, NULL, OPT_NUM_MBUFS_NUM },
{ NULL, 0, NULL, 0 }
};
while ((opt = getopt_long(argc, argv, "",
lgopts, &option_idx)) != EOF) {
switch (opt) {
case OPT_STATS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --stats\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_stats = ret;
break;
case OPT_NUM_MBUFS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret < 0 || ret == 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --total-num-mbufs\n");
vhost_rdma_usage(prgname);
return -1;
}
total_num_mbufs = ret;
break;
case OPT_SOCKET_FILE_NUM:
if (vhost_rdma_parse_socket_path(optarg) < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid socket path: %s\n", optarg);
vhost_rdma_usage(prgname);
return -1;
}
break;
case OPT_TX_CSUM_NUM:
ret = vhost_rdma_parse_num_opt(optarg, 1);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --tx-csum (must be 0 or 1)\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_tx_csum = ret;
break;
default:
vhost_rdma_usage(prgname);
return -1;
}
}
if (nb_sockets == 0) {
RTE_LOG(ERR, VHOST_CONFIG, "At least one --socket-file must be specified.\n");
vhost_rdma_usage(prgname);
return -1;
}
return 0;
}
/**
* @brief Main packet I/O thread running on worker lcores.
*
* Currently just a placeholder loop. Will later handle:
* - Receiving from physical NIC
* - Enqueuing to vhost rings
* - Dequeuing from vhost rings
* - Transmitting to physical NIC
*
* @param arg User argument (unused)
* @return 0 on exit
*/
static int
vhost_rdma_txrx_main_thread(void *arg __rte_unused)
{
unsigned lcore_id = rte_lcore_id();
RTE_LOG(INFO, VHOST_DATA, "Started packet processing thread on lcore %u\n", lcore_id);
while (!force_quit) {
rte_delay_us(1000000); /* Sleep 1 second */
RTE_LOG(DEBUG, VHOST_DATA, "Heartbeat on lcore %u\n", lcore_id);
}
return 0;
}
/**
* @brief Application entry point.
*
* Initializes EAL, parses args, sets up ports, mempools, rings,
* registers vhost drivers, launches threads.
*/
int main(int argc, char **argv)
{
unsigned lcore_id, core_id = 0;
int ret;
uint16_t port_id;
bool pair_found = false;
struct rte_eth_dev_info dev_info;
/* Register signal handler for clean shutdown */
signal(SIGINT, vhost_rdma_signal_handler);
signal(SIGTERM, vhost_rdma_signal_handler);
/* Initialize DPDK Environment Abstraction Layer */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_panic("Unable to initialize DPDK EAL\n");
argc -= ret;
argv += ret;
rte_log_set_global_level(RTE_LOG_NOTICE);
/* Parse application-specific arguments */
if (vhost_rdma_parse_args(argc, argv) != 0) {
rte_exit(EXIT_FAILURE, "Argument parsing failed\n");
}
/* Initialize per-lcore data structures */
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
if (rte_lcore_is_enabled(lcore_id)) {
lcore_ids[core_id++] = lcore_id;
}
}
if (rte_lcore_count() < 2) {
rte_exit(EXIT_FAILURE, "At least two cores required (one main + one worker)\n");
}
/*
* Create shared memory pool for mbufs
* Used by both RX and TX paths
*/
vhost_rdma_mbuf_pool = rte_pktmbuf_pool_create(
"mbuf_pool_shared",
total_num_mbufs,
MBUF_CACHE_SIZE,
sizeof(struct vhost_rdma_pkt_info),
MBUF_DATA_SIZE,
rte_socket_id()
);
if (vhost_rdma_mbuf_pool == NULL) {
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool: %s\n", rte_strerror(rte_errno));
}
/*
* Create shared rings for packet exchange
* SP_ENQ: Single-producer enqueue (from NIC)
* MC_HTS_DEQ: Multi-consumer with HTS dequeue (to workers)
*/
vhost_rdma_rx_ring = rte_ring_create(
"ring_rx_shared",
MAX_RING_COUNT,
rte_socket_id(),
RING_F_SP_ENQ | RING_F_MC_HTS_DEQ
);
if (vhost_rdma_rx_ring == NULL)
rte_exit(EXIT_FAILURE, "Failed to create RX ring: %s\n", rte_strerror(rte_errno));
vhost_rdma_tx_ring = rte_ring_create(
"ring_tx_shared",
MAX_RING_COUNT,
rte_socket_id(),
RING_F_MP_HTS_ENQ | RING_F_SC_DEQ
);
if (vhost_rdma_tx_ring == NULL)
rte_exit(EXIT_FAILURE, "Failed to create TX ring: %s\n", rte_strerror(rte_errno));
/*
* Find and initialize backend Ethernet device (e.g., net_tap or net_vhost)
*/
RTE_ETH_FOREACH_DEV(port_id) {
ret = rte_eth_dev_info_get(port_id, &dev_info);
if (ret != 0) {
RTE_LOG(WARNING, VHOST_CONFIG, "Failed to get info for port %u\n", port_id);
continue;
}
if (!pair_found &&
(strcmp(dev_info.driver_name, "net_tap") == 0 ||
strcmp(dev_info.driver_name, "net_vhost") == 0)) {
pair_port_id = port_id;
pair_found = true;
ret = vhost_rdma_init_port(port_id, !!enable_tx_csum);
if (ret != 0) {
rte_exit(EXIT_FAILURE, "Failed to initialize port %u: %s\n",
port_id, rte_strerror(-ret));
}
RTE_LOG(INFO, VHOST_CONFIG,
"Using device %s (port %u) as backend interface\n",
dev_info.device->name, port_id);
}
}
if (!pair_found) {
rte_exit(EXIT_FAILURE, "No suitable backend Ethernet device found\n");
}
/*
* Launch worker threads for packet processing
*/
RTE_LCORE_FOREACH_WORKER(lcore_id) {
rte_eal_remote_launch(vhost_rdma_txrx_main_thread, NULL, lcore_id);
}
/*
* Setup per-vhost-device resources and register vhost drivers
*/
char name_buf[SOCKET_PATH_MAX];
for (int i = 0; i < nb_sockets; i++) {
const char *sock_path = socket_path + i * SOCKET_PATH_MAX;
struct vhost_rdma_device *dev = &g_vhost_rdma_dev[i];
dev->vid = i;
if (i == 0) {
/* Use shared resources for first device */
dev->rx_ring = vhost_rdma_rx_ring;
dev->tx_ring = vhost_rdma_tx_ring;
dev->mbuf_pool = vhost_rdma_mbuf_pool;
} else {
/* Create dedicated resources for additional devices */
snprintf(name_buf, sizeof(name_buf), "dev%u_rx_ring", i);
dev->rx_ring = rte_ring_create(name_buf, MAX_RING_COUNT,
rte_socket_id(), RING_F_SP_ENQ | RING_F_MC_HTS_DEQ);
if (!dev->rx_ring)
rte_exit(EXIT_FAILURE, "Failed to create RX ring %d\n", i);
snprintf(name_buf, sizeof(name_buf), "dev%u_tx_ring", i);
dev->tx_ring = rte_ring_create(name_buf, MAX_RING_COUNT,
rte_socket_id(), RING_F_MP_HTS_ENQ | RING_F_SC_DEQ);
if (!dev->tx_ring)
rte_exit(EXIT_FAILURE, "Failed to create TX ring %d\n", i);
snprintf(name_buf, sizeof(name_buf), "dev%u_mbuf_pool", i);
dev->mbuf_pool = rte_pktmbuf_pool_create(name_buf,
total_num_mbufs,
MBUF_CACHE_SIZE,
sizeof(struct vhost_rdma_pkt_info),
MBUF_DATA_SIZE,
rte_socket_id());
if (!dev->mbuf_pool)
rte_exit(EXIT_FAILURE, "Failed to create mbuf pool %d\n", i);
snprintf(name_buf, sizeof(name_buf), "dev%u_task_ring", i);
dev->task_ring = rte_ring_create(name_buf, MAX_RING_COUNT,
rte_socket_id(),
RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ);
if (!dev->task_ring)
rte_exit(EXIT_FAILURE, "Failed to create task ring %d\n", i);
}
/* Construct and register vhost device */
ret = vhost_rdma_construct(dev, sock_path, i);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to construct vhost device %d\n", i);
continue;
}
ret = rte_vhost_driver_start(sock_path);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Failed to start vhost driver for %s\n", sock_path);
} else {
RTE_LOG(INFO, VHOST_CONFIG, "Successfully started vhost driver: %s\n", sock_path);
}
}
/* Wait for all worker threads to complete (they won't unless forced) */
RTE_LCORE_FOREACH_WORKER(lcore_id) {
rte_eal_wait_lcore(lcore_id);
}
/* Cleanup */
rte_eal_cleanup();
free(socket_path);
RTE_LOG(INFO, VHOST_CONFIG, "Application terminated gracefully.\n");
return 0;
}
```
---
### ✅ 修改说明总结:
| 改进点 | 描述 |
|-------|------|
| **命名规范化** | 所有变量、函数名采用 `snake_case`,符合 Linux C 编程惯例 |
| **英文注释完整化** | 每个函数、关键变量、控制流都有详细英文注释 |
| **错误处理增强** | 增加更多 `NULL` 检查、日志分级(INFO/WARN/ERR) |
| **资源清理** | 添加 `free(socket_path)` 和信号安全退出 |
| **模块化结构** | 函数职责分明:解析、初始化、运行、清理 |
| **DPDK 最佳实践** | 使用正确的 ring/mempool 标志位(如 HTS、SP/MC) |
| **可读性提升** | 删除冗余宏,优化条件判断格式 |
---
### ❧ 下一步建议功能扩展:
- 实现真正的包转发(从物理端口到 vhost 用户态 VM)
- 添加统计打印线程(基于 `enable_stats` 时间间隔)
- 支持多队列、中断模式、eventdev 集成
- 增加 unit test mock 框架支持
---