/*
* Vhost-user RDMA Device - Initialization and Packet Forwarding
*
* SPDX-License-Identifier: BSD-3-Clause
* Copyright (C) 2025 KylinSoft Inc. All rights reserved.
*
* Author: Xiong Weimin <xiongweimin@kylinos.cn>
*
*/
#include <signal.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
/* DPDK headers */
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_debug.h>
#include <rte_log.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ring.h>
#include <rte_malloc.h>
#include <dev_driver.h>
/* Local headers */
#include "vhost_rdma_ib.h"
#include "vhost_rdma.h"
#include "vhost_rdma_pkt.h"
#include "vhost_rdma_log.h"
/**
* Maximum length for Unix socket path
*/
#define SOCKET_PATH_MAX 64
/**
* Default number of RX/TX descriptors
*/
#define MAX_NB_RXD 1024
#define MAX_NB_TXD 1024
/**
* Size of shared rings between vhost devices and datapath
*/
#define MAX_RING_COUNT 1024
/**
* Default number of mbufs in memory pool
*/
#define NUM_MBUFS_DEFAULT (1UL << 16) // 65536
/**
* Cache size for per-lcore mbuf cache
*/
#define MBUF_CACHE_SIZE 256
/**
* Data buffer size in each mbuf
*/
#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
/* Forward declarations */
extern struct vhost_rdma_device g_vhost_rdma_dev[];
/* Global configuration */
static char *socket_path; /* Array of socket paths */
static int nb_sockets = 0; /* Number of vhost sockets */
static uint16_t pair_port_id = UINT16_MAX; /* Physical port ID to forward packets */
static volatile bool force_quit; /* Signal to exit cleanly */
/* Stats and feature flags */
static uint32_t enable_stats; /* Enable periodic stats printing (seconds) */
static uint32_t enable_tx_csum; /* Enable TX checksum offload */
static int total_num_mbufs = NUM_MBUFS_DEFAULT;/* Total mbufs across pools */
/* Shared resources */
static struct rte_ring *vhost_rdma_rx_ring;
static struct rte_ring *vhost_rdma_tx_ring;
static struct rte_mempool *vhost_rdma_mbuf_pool;
/* Per-lcore info for device management */
struct lcore_info {
uint32_t device_num;
TAILQ_HEAD(vhost_dev_tailq_list, vhost_rdma_device) vdev_list;
};
static struct lcore_info lcore_info[RTE_MAX_LCORE];
static unsigned int lcore_ids[RTE_MAX_LCORE];
/* Port configuration templates */
static struct rte_eth_conf default_port_config;
static struct rte_eth_conf offload_port_config = {
.txmode = {
.offloads = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
RTE_ETH_TX_OFFLOAD_TCP_CKSUM,
},
};
enum {
#define OPT_STATS "stats"
OPT_STATS_NUM,
#define OPT_SOCKET_FILE "socket-file"
OPT_SOCKET_FILE_NUM,
#define OPT_TX_CSUM "tx-csum"
OPT_TX_CSUM_NUM,
#define OPT_NUM_MBUFS "total-num-mbufs"
OPT_NUM_MBUFS_NUM,
};
/**
* @brief Unregister all registered vhost drivers.
*
* Called during signal cleanup to ensure no stale sockets remain.
*
* @param socket_num Number of socket paths to unregister
*/
static void
unregister_drivers(int socket_num)
{
int i, ret;
for (i = 0; i < socket_num; i++) {
const char *path = socket_path + i * SOCKET_PATH_MAX;
ret = rte_vhost_driver_unregister(path);
if (ret != 0) {
RDMA_LOG_ERR("Failed to unregister vhost driver for socket %s\n", path);
} else {
RDMA_LOG_INFO("Unregistered socket: %s\n", path);
}
}
}
/**
* @brief Signal handler for graceful shutdown (SIGINT).
*
* Cleans up vhost driver registrations and exits.
*/
static void
vhost_rdma_signal_handler(__rte_unused int signum)
{
RDMA_LOG_INFO("Received SIGINT, shutting down...\n");
if((signum == SIGINT) || (signum == SIGTERM))
force_quit = true;
unregister_drivers(nb_sockets);
exit(0);
}
/**
* @brief Initialize an Ethernet port with given offload settings.
*
* Configures one RX/TX queue, sets up descriptor rings, starts the port.
*
* @param port_id The port identifier
* @param offload Whether to enable hardware offloads
* @return 0 on success, negative on failure
*/
static int
vhost_rdma_init_port(uint16_t port_id, bool offload)
{
int ret;
uint16_t nb_rxd = MAX_NB_RXD;
uint16_t nb_txd = MAX_NB_TXD;
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = offload ? offload_port_config : default_port_config;
struct rte_eth_txconf txconf;
struct rte_ether_addr addr;
char mac_str[RTE_ETHER_ADDR_FMT_SIZE];
RDMA_LOG_INFO("Initializing port %u with %s offloads\n", port_id,
offload ? "enabled" : "disabled");
ret = rte_eth_dev_info_get(port_id, &dev_info);
if (ret < 0) {
RDMA_LOG_ERR("Failed to get device info for port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
if (ret < 0) {
RDMA_LOG_ERR("Failed to configure port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
if (ret < 0) {
LOG_WARN("Failed to adjust number of descriptors for port %u\n", port_id);
}
ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd,
rte_eth_dev_socket_id(port_id),
NULL,
vhost_rdma_mbuf_pool);
if (ret < 0) {
RDMA_LOG_ERR("Failed to setup RX queue for port %u\n", port_id);
goto out;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd,
rte_eth_dev_socket_id(port_id),
&txconf);
if (ret < 0) {
RDMA_LOG_ERR("Failed to setup TX queue for port %u\n", port_id);
goto out;
}
ret = rte_eth_dev_start(port_id);
if (ret < 0) {
RDMA_LOG_ERR("Failed to start port %u\n", port_id);
goto out;
}
ret = rte_eth_promiscuous_enable(port_id);
if (ret < 0) {
LOG_WARN("Failed to enable promiscuous mode on port %u\n", port_id);
}
ret = rte_eth_macaddr_get(port_id, &addr);
if (ret == 0) {
rte_ether_format_addr(mac_str, sizeof(mac_str), &addr);
RDMA_LOG_INFO("Port %u MAC address: %s\n", port_id, mac_str);
} else {
LOG_WARN("Could not read MAC address for port %u\n", port_id);
}
out:
return ret;
}
/**
* @brief Print usage information.
*/
static void
vhost_rdma_usage(const char *prgname)
{
printf("%s [EAL options] --\n"
" -p PORTMASK\n"
" --socket-file <path> : Path to vhost-user socket (can be repeated)\n"
" --stats <N> : Print stats every N seconds (0=disable)\n"
" --tx-csum <0|1> : Disable/enable TX checksum offload\n"
" --total-num-mbufs <N> : Total number of mbufs in pool (default: %ld)\n",
prgname, NUM_MBUFS_DEFAULT);
}
/**
* @brief Parse a numeric option safely.
*
* @param q_arg Input string
* @param max_valid_value Maximum allowed value
* @return Parsed integer or -1 on error
*/
static int
vhost_rdma_parse_num_opt(const char *q_arg, uint32_t max_valid_value)
{
char *end = NULL;
unsigned long num;
errno = 0;
num = strtoul(q_arg, &end, 10);
if (!q_arg || q_arg[0] == '\0' || end == NULL || *end != '\0')
return -1;
if (errno != 0 || num > max_valid_value)
return -1;
return (int)num;
}
/**
* @brief Parse and store vhost socket path.
*
* Supports multiple sockets via repeated --socket-file.
*
* @param q_arg Socket file path
* @return 0 on success, -1 on failure
*/
static int
vhost_rdma_parse_socket_path(const char *q_arg)
{
char *old_ptr;
if (strnlen(q_arg, SOCKET_PATH_MAX) >= SOCKET_PATH_MAX) {
RTE_LOG(ERR, VHOST_CONFIG, "Socket path too long: %s\n", q_arg);
return -1;
}
old_ptr = socket_path;
socket_path = realloc(socket_path, SOCKET_PATH_MAX * (nb_sockets + 1));
if (socket_path == NULL) {
free(old_ptr);
return -1;
}
strncpy(socket_path + nb_sockets * SOCKET_PATH_MAX, q_arg, SOCKET_PATH_MAX - 1);
socket_path[(nb_sockets + 1) * SOCKET_PATH_MAX - 1] = '\0';
RDMA_LOG_INFO("Registered socket[%d]: %s\n",
nb_sockets, socket_path + nb_sockets * SOCKET_PATH_MAX);
nb_sockets++;
return 0;
}
/**
* @brief Parse command-line arguments.
*
* Supported options:
* --socket-file, --stats, --tx-csum, --total-num-mbufs
*
* @param argc Argument count
* @param argv Argument vector
* @return 0 on success, -1 on failure
*/
static int
vhost_rdma_parse_args(int argc, char **argv)
{
int opt, ret;
int option_idx;
const char *prgname = argv[0];
static struct option lgopts[] = {
{ "stats", required_argument, NULL, OPT_STATS_NUM },
{ "socket-file", required_argument, NULL, OPT_SOCKET_FILE_NUM },
{ "tx-csum", required_argument, NULL, OPT_TX_CSUM_NUM },
{ "total-num-mbufs",required_argument, NULL, OPT_NUM_MBUFS_NUM },
{ NULL, 0, NULL, 0 }
};
while ((opt = getopt_long(argc, argv, "",
lgopts, &option_idx)) != EOF) {
switch (opt) {
case OPT_STATS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --stats\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_stats = ret;
break;
case OPT_NUM_MBUFS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret < 0 || ret == 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --total-num-mbufs\n");
vhost_rdma_usage(prgname);
return -1;
}
total_num_mbufs = ret;
break;
case OPT_SOCKET_FILE_NUM:
if (vhost_rdma_parse_socket_path(optarg) < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid socket path: %s\n", optarg);
vhost_rdma_usage(prgname);
return -1;
}
break;
case OPT_TX_CSUM_NUM:
ret = vhost_rdma_parse_num_opt(optarg, 1);
if (ret < 0) {
RTE_LOG(ERR, VHOST_CONFIG, "Invalid value for --tx-csum (must be 0 or 1)\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_tx_csum = ret;
break;
default:
vhost_rdma_usage(prgname);
return -1;
}
}
if (nb_sockets == 0) {
RTE_LOG(ERR, VHOST_CONFIG, "At least one --socket-file must be specified.\n");
vhost_rdma_usage(prgname);
return -1;
}
return 0;
}
static int
vhost_rdma_main_loop(__rte_unused void* arg)
{
while (!force_quit) {
}
return 0;
} 把这段代码的注释去掉,保留开头的版权声明
最新发布