/*
* Vhost-user RDMA device : init and packets forwarding
*
* Copyright (C) 2025 KylinSoft Inc. and/or its affiliates. All rights reserved.
*
* Author: Xiong Weimin <xiongweimin@kylinos.cn>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <signal.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <setjmp.h>
#include <stdarg.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <rte_memory.h>
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_debug.h>
#include <rte_log.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ring.h>
#include <dev_driver.h>
#include "vhost_rdma_ib.h"
#include "vhost_rdma.h"
#include "vhost_rdma_pkt.h"
#include "vhost_rdma_log.h"
#define SOCKET_PATH_MAX 64
#define MAX_NB_RXD 1024
#define MAX_NB_TXD 1024
#define MAX_RING_COUNT 1024
#define NUM_MBUFS_DEFAULT 0x10000
#define MBUF_CACHE_SIZE 256
#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
static struct rte_eth_conf default_port_config;
static struct rte_eth_conf offload_port_config = {
.txmode = {
.offloads = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM,
},
};
TAILQ_HEAD(vhost_dev_tailq_list, vhost_rdma_device);
struct lcore_info {
uint32_t device_num;
/* Flag to synchronize device removal. */
volatile uint8_t dev_removal_flag;
struct vhost_dev_tailq_list vdev_list;
};
static struct rte_ring* vhost_rdma_rx_ring;
static struct rte_ring* vhost_rdma_tx_ring;
static struct rte_mempool *vhost_rdma_mbuf_pool;
static char *socket_path;
static int nb_sockets = 0;
uint16_t pair_port_id = 65535;
volatile bool force_quit;
/* Enable stats. */
static uint32_t enable_stats = 0;
/* number of mbufs in all pools - if specified on command-line. */
static int total_num_mbufs = NUM_MBUFS_DEFAULT;
/* Disable TX checksum offload */
static uint32_t enable_tx_csum;
static unsigned lcore_ids[RTE_MAX_LCORE];
static struct lcore_info lcore_info[RTE_MAX_LCORE];
static void
unregister_drivers(int socket_num)
{
int i, ret;
for (i = 0; i < socket_num; i++) {
ret = rte_vhost_driver_unregister(socket_path + i * PATH_MAX);
if (ret != 0)
RTE_LOG(ERR, VHOST_CONFIG,
"Fail to unregister vhost driver for %s.\n",
socket_path + i * PATH_MAX);
}
}
static void vhost_rdma_signal_handler(__rte_unused int signum)
{
unregister_drivers(nb_sockets);
exit(0);
}
static int vhost_rdma_init_port(uint16_t port_id, bool offload)
{
int ret;
uint16_t nb_rxd = MAX_NB_RXD;
uint16_t nb_txd = MAX_NB_TXD;
struct rte_eth_dev_info dev_info;
struct rte_eth_conf port_conf = offload ? default_port_config: offload_port_config;
struct rte_eth_txconf txconf;
struct rte_ether_addr addr;
char buf[RTE_ETHER_ADDR_FMT_SIZE];
ret = rte_eth_dev_info_get(port_id, &dev_info);
if (ret < 0)
goto out;
ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
if (ret < 0)
goto out;
ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
if (ret < 0)
goto out;
ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd,
rte_eth_dev_socket_id(port_id), NULL,
vhost_rdma_mbuf_pool);
if (ret < 0)
goto out;
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd,
rte_eth_dev_socket_id(port_id), &txconf);
if (ret < 0)
goto out;
ret = rte_eth_dev_start(port_id);
if (ret < 0)
goto out;
ret = rte_eth_promiscuous_enable(port_id);
if (ret < 0)
goto out;
ret = rte_eth_macaddr_get(port_id, &addr);
if (ret < 0)
goto out;
rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, &addr);
LOG_INFO("port %d MAC %s", port_id, buf);
out:
return ret;
}
/*
* Display usage
*/
static void vhost_rdma_usage(const char *prgname)
{
RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
" --socket-file <path>\n"
" --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
" --tx-csum [0|1]: disable/enable TX checksum offload.\n"
" --total-num-mbufs [0-N]: set the number of mbufs to be allocated in mbuf pools, the default value is 147456.\n",
prgname);
}
enum {
#define OPT_STATS "stats"
OPT_STATS_NUM = 256,
#define OPT_SOCKET_FILE "socket-file"
OPT_SOCKET_FILE_NUM,
#define OPT_TX_CSUM "tx-csum"
OPT_TX_CSUM_NUM,
#define OPT_NUM_MBUFS "total-num-mbufs"
OPT_NUM_MBUFS_NUM,
};
/*
* Parse num options at run time.
*/
static int vhost_rdma_parse_num_opt(const char *q_arg, uint32_t max_valid_value)
{
char *end = NULL;
unsigned long num;
errno = 0;
/* parse unsigned int string */
num = strtoul(q_arg, &end, 10);
if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
return -1;
if (num > max_valid_value)
return -1;
return num;
}
/*
* Set socket file path.
*/
static int vhost_rdma_parse_socket_path(const char *q_arg)
{
char *old;
/* parse number string */
if (strnlen(q_arg, SOCKET_PATH_MAX) == SOCKET_PATH_MAX)
return -1;
old = socket_path;
socket_path = realloc(socket_path, SOCKET_PATH_MAX * (nb_sockets + 1));
if (socket_path == NULL) {
free(old);
return -1;
}
strlcpy(socket_path + nb_sockets * SOCKET_PATH_MAX, q_arg, PATH_MAX);
RDMA_LOG_ERR("socket_path[%d]: %s\n", nb_sockets, socket_path + nb_sockets * SOCKET_PATH_MAX);
nb_sockets++;
return 0;
}
static int vhost_rdma_parse_args(int argc, char **argv)
{
int opt, ret;
int option_idx;
const char *prgname = argv[0];
static struct option cmdopts[] = {
{OPT_STATS, required_argument,
NULL, OPT_STATS_NUM},
{OPT_SOCKET_FILE, required_argument,
NULL, OPT_SOCKET_FILE_NUM},
{OPT_TX_CSUM, required_argument,
NULL, OPT_TX_CSUM_NUM},
{OPT_NUM_MBUFS, required_argument,
NULL, OPT_NUM_MBUFS_NUM},
{NULL, 0, 0, 0},
};
while ((opt = getopt_long(argc, argv, "s:",
cmdopts, &option_idx)) != EOF)
{
switch (opt) {
case OPT_STATS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret == -1) {
RDMA_LOG_ERR("Invalid argument for stats [0..N]\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_stats = ret;
break;
case OPT_NUM_MBUFS_NUM:
ret = vhost_rdma_parse_num_opt(optarg, INT32_MAX);
if (ret == -1) {
RDMA_LOG_ERR("Invalid argument for total-num-mbufs [0..N]\n");
vhost_rdma_usage(prgname);
return -1;
}
if (total_num_mbufs < ret)
total_num_mbufs = ret;
break;
/* Set socket file path. */
case OPT_SOCKET_FILE_NUM:
if (vhost_rdma_parse_socket_path(optarg) == -1) {
RDMA_LOG_ERR("Invalid argument for socket name (Max %d characters)\n", PATH_MAX);
vhost_rdma_usage(prgname);
return -1;
}
break;
case OPT_TX_CSUM_NUM:
ret = vhost_rdma_parse_num_opt(optarg, 1);
if (ret == -1) {
RDMA_LOG_ERR("Invalid argument for tx-csum [0|1]\n");
vhost_rdma_usage(prgname);
return -1;
}
enable_tx_csum = ret;
break;
}
}
return 0;
}
static int vhost_rdma_txrx_main_thread(void *arg __rte_unused)
{
unsigned lcore_id = rte_lcore_id();
RDMA_LOG_INFO("Processing on Core %u started\n", lcore_id);
while (1)
{
rte_delay_us(1000000);
RDMA_LOG_INFO("vhost_rdma_txrx_main_thread !!!");
}
return 0;
}
int main(int argc, char **argv)
{
unsigned lcore_id, core_id = 0;
int ret;
uint16_t port_id;
bool pair_found = false;
struct rte_eth_dev_info dev_info;
signal(SIGINT, vhost_rdma_signal_handler);
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_panic("Cannot init EAL\n");
rte_log_set_global_level(RTE_LOG_NOTICE);
argc -= ret;
argv += ret;
if (vhost_rdma_parse_args(argc, argv) != 0) {
rte_exit(EXIT_FAILURE, "failed to parse args\n");
}
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
if (rte_lcore_is_enabled(lcore_id))
lcore_ids[core_id++] = lcore_id;
}
if (rte_lcore_count() < 2) {
rte_exit(EXIT_FAILURE, "Not enough cores, expecting at least 2\n");
}
/* init mempool */
vhost_rdma_mbuf_pool = rte_pktmbuf_pool_create("vhost_rdma_mbuf_pool0", total_num_mbufs,
MBUF_CACHE_SIZE, sizeof(struct vhost_rdma_pkt_info),
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (vhost_rdma_mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
vhost_rdma_rx_ring = rte_ring_create("vhost_rdma_rx_ring0", MAX_RING_COUNT, rte_socket_id(),
RING_F_SP_ENQ | RING_F_MC_HTS_DEQ);
if (vhost_rdma_rx_ring == NULL)
rte_exit(EXIT_FAILURE, "create rx ring0 failed: %s\n", rte_strerror(rte_errno));
vhost_rdma_tx_ring = rte_ring_create("vhost_rdma_tx_ring0", MAX_RING_COUNT, rte_socket_id(),
RING_F_MP_HTS_ENQ | RING_F_SC_DEQ);
if (vhost_rdma_tx_ring == NULL)
rte_exit(EXIT_FAILURE, "create tx ring0 failed: %s\n", rte_strerror(rte_errno));
/* init eth_dev */
RTE_ETH_FOREACH_DEV(port_id) {
rte_eth_dev_info_get(port_id, &dev_info);
if (!pair_found && ((strcmp(dev_info.driver_name, "net_tap") == 0)
|| (strcmp(dev_info.driver_name, "net_vhost") == 0)))
{
pair_port_id = port_id;
pair_found = true;
if (vhost_rdma_init_port(port_id, true) != 0) {
rte_exit(EXIT_FAILURE, "init port failed : %s\n", rte_strerror(rte_errno));
}
RDMA_LOG_ERR_DP("use %s(%d) as backend netdevice", dev_info.device->name, port_id);
}
}
if (!pair_found)
rte_exit(EXIT_FAILURE, "no netdev found");
/* Launch all data cores. */
RTE_LCORE_FOREACH_WORKER(lcore_id)
rte_eal_remote_launch(vhost_rdma_txrx_main_thread, NULL, lcore_id);
char ring_name[SOCKET_PATH_MAX];
char pool_name[SOCKET_PATH_MAX];
for(int i = 0;i < nb_sockets; i++){
char *file = socket_path + i * SOCKET_PATH_MAX;
struct vhost_rdma_device *dev = &g_vhost_rdma_dev[i];
RDMA_LOG_ERR_DP("nb_sockets : %d",nb_sockets);
dev->vid = i;
if(i == 0){
dev->tx_ring = vhost_rdma_tx_ring;
dev->rx_ring = vhost_rdma_rx_ring;
dev->mbuf_pool = vhost_rdma_mbuf_pool;
}else{
snprintf(ring_name, SOCKET_PATH_MAX, "dev%u_tx_ring", i);
dev->tx_ring = rte_ring_create(ring_name, MAX_RING_COUNT, rte_socket_id(),
RING_F_MP_HTS_ENQ | RING_F_SC_DEQ);
if (dev->tx_ring == NULL)
rte_exit(EXIT_FAILURE, "create tx ring %u failed: %s\n", i, rte_strerror(rte_errno));
snprintf(ring_name, SOCKET_PATH_MAX, "dev%u_rx_ring", i);
dev->rx_ring = rte_ring_create(ring_name, MAX_RING_COUNT, rte_socket_id(),
RING_F_SP_ENQ | RING_F_MC_HTS_DEQ);
if (dev->rx_ring == NULL)
rte_exit(EXIT_FAILURE, "create rx ring %u failed: %s\n", i, rte_strerror(rte_errno));
snprintf(pool_name, SOCKET_PATH_MAX, "dev%u_mbuf_pool", i);
dev->mbuf_pool = rte_pktmbuf_pool_create(pool_name, total_num_mbufs,
MBUF_CACHE_SIZE, sizeof(struct vhost_rdma_pkt_info),
RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (dev->mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "create dev%u mbuf pool failed : %s\n", i, rte_strerror(rte_errno));
dev->task_ring = rte_ring_create("rdma_task_ring", MAX_RING_COUNT,
rte_socket_id(),
RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ);
if (dev->task_ring == NULL)
rte_exit(EXIT_FAILURE, "create dev%u task ring failed : %s\n", i, rte_strerror(rte_errno));
}
ret = vhost_rdma_construct(dev, file, i);
rte_vhost_driver_start(file);
}
RTE_LCORE_FOREACH_WORKER(lcore_id)
rte_eal_wait_lcore(lcore_id);
rte_eal_cleanup();
return 0;
}
修改成linux应用程序标准风格,并添加英文注释
最新发布