DPDK官方例程分析(4)-flow_classify

前言

Flow Classify示例应用程序基于转发应用程序的简单框架示例。
它旨在演示使用Flow Classify库API的DPDK转发应用程序的基本组件

flow_classify例子对于DPDK的学习具有很重要的意义,是比较重要的章节。有点类似于linux网络中的iptables功能,也有点类似于我们在linux内核中开发的防火墙功能。我们可以使用flow模块对数据包进行统计,丢弃等基本的操作。

程序代码

ACL介绍

首先该例程中主要是面向的对象是IP流量中的五元组信息。即源ip地址,目的ip地址,源端口号,目的端口号,协议号。学过linux网络的都知道,该五元组可以决定一个数据包的唯一性。因为我们操作的是IP流量五元组,所以这里使用的ACL classify算法。ACL规则主要面向的是IP流量中的五元组信息。

关于classify算法可以参考DPDK ACL算法介绍

DPDK报文分类与访问控制

dpdk提供了一个访问控制库,提供了基于一系列分类规则对接收到的报文进行分类的能力。

ACL库用来在一系列规则上执行N元组查找,可以实现多个分类和对每个分类查找最佳匹配(最高优先级)。

ACL库的api提供如下基本操作:

  • 创建一个新的访问控制(AC)环境实例(context)
  • 添加规则到这个环境实例
  • 为这个实例里所有的规则,创建必需的运行时结构体来指针报文分类
  • 执行接收报文分类
  • 删除AC环境实例和对应的运行时结构体,并释放内存

该例程是对官方例程flow_classify补充说明flow_classify链接地址

程序

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright(c) 2017 Intel Corporation
 */

#include <stdint.h>
#include <inttypes.h>
#include <getopt.h>

#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <rte_flow.h>
#include <rte_flow_classify.h>
#include <rte_table_acl.h>

#define RX_RING_SIZE 1024
#define TX_RING_SIZE 1024

#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32

#define MAX_NUM_CLASSIFY 30
#define FLOW_CLASSIFY_MAX_RULE_NUM 91
#define FLOW_CLASSIFY_MAX_PRIORITY 8
#define FLOW_CLASSIFIER_NAME_SIZE 64

#define COMMENT_LEAD_CHAR	('#')
#define OPTION_RULE_IPV4	"rule_ipv4"
#define RTE_LOGTYPE_FLOW_CLASSIFY	RTE_LOGTYPE_USER3
#define flow_classify_log(format, ...) \
		RTE_LOG(ERR, FLOW_CLASSIFY, format, ##__VA_ARGS__)

#define uint32_t_to_char(ip, a, b, c, d) do {\
		*a = (unsigned char)(ip >> 24 & 0xff);\
		*b = (unsigned char)(ip >> 16 & 0xff);\
		*c = (unsigned char)(ip >> 8 & 0xff);\
		*d = (unsigned char)(ip & 0xff);\
	} while (0)

enum {
	CB_FLD_SRC_ADDR,
	CB_FLD_DST_ADDR,
	CB_FLD_SRC_PORT,
	CB_FLD_SRC_PORT_DLM,
	CB_FLD_SRC_PORT_MASK,
	CB_FLD_DST_PORT,
	CB_FLD_DST_PORT_DLM,
	CB_FLD_DST_PORT_MASK,
	CB_FLD_PROTO,
	CB_FLD_PRIORITY,
	CB_FLD_NUM,
};

static struct{
	const char *rule_ipv4_name;
} parm_config;
const char cb_port_delim[] = ":";

static const struct rte_eth_conf port_conf_default = {
	.rxmode = {
		.max_rx_pkt_len = ETHER_MAX_LEN,
	},
};

struct flow_classifier {
	struct rte_flow_classifier *cls;
};

struct flow_classifier_acl {
	struct flow_classifier cls;
} __rte_cache_aligned;

/* ACL field definitions for IPv4 5 tuple rule */

enum {
	PROTO_FIELD_IPV4,
	SRC_FIELD_IPV4,
	DST_FIELD_IPV4,
	SRCP_FIELD_IPV4,
	DSTP_FIELD_IPV4,
	NUM_FIELDS_IPV4
};

enum {
	PROTO_INPUT_IPV4,
	SRC_INPUT_IPV4,
	DST_INPUT_IPV4,
	SRCP_DESTP_INPUT_IPV4
};

static struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {
	/* first input field - always one byte long. */
	{
		.type = RTE_ACL_FIELD_TYPE_BITMASK,
		.size = sizeof(uint8_t),
		.field_index = PROTO_FIELD_IPV4,
		.input_index = PROTO_INPUT_IPV4,
		.offset = sizeof(struct ether_hdr) +
			offsetof(struct ipv4_hdr, next_proto_id),
	},
	/* next input field (IPv4 source address) - 4 consecutive bytes. */
	{
		/* rte_flow uses a bit mask for IPv4 addresses */
		.type = RTE_ACL_FIELD_TYPE_BITMASK,
		.size = sizeof(uint32_t),
		.field_index = SRC_FIELD_IPV4,
		.input_index = SRC_INPUT_IPV4,
		.offset = sizeof(struct ether_hdr) +
			offsetof(struct ipv4_hdr, src_addr),
	},
	/* next input field (IPv4 destination address) - 4 consecutive bytes. */
	{
		/* rte_flow uses a bit mask for IPv4 addresses */
		.type = RTE_ACL_FIELD_TYPE_BITMASK,
		.size = sizeof(uint32_t),
		.field_index = DST_FIELD_IPV4,
		.input_index = DST_INPUT_IPV4,
		.offset = sizeof(struct ether_hdr) +
			offsetof(struct ipv4_hdr, dst_addr),
	},
	/*
	 * Next 2 fields (src & dst ports) form 4 consecutive bytes.
	 * 
	 * They share the same input index.
	 */
	{
		/* rte_flow uses a bit mask for protocol ports */
		.type = RTE_ACL_FIELD_TYPE_BITMASK,
		.size = sizeof(uint16_t),
		.field_index = SRCP_FIELD_IPV4,
		.input_index = SRCP_DESTP_INPUT_IPV4,
		.offset = sizeof(struct ether_hdr) +
			sizeof(struct ipv4_hdr) +
			offsetof(struct tcp_hdr, src_port),
	},
	{
		/* rte_flow uses a bit mask for protocol ports */
		.type = RTE_ACL_FIELD_TYPE_BITMASK,
		.size = sizeof(uint16_t),
		.field_index = DSTP_FIELD_IPV4,
		.input_index = SRCP_DESTP_INPUT_IPV4,
		.offset = sizeof(struct ether_hdr) +
			sizeof(struct ipv4_hdr) +
			offsetof(struct tcp_hdr, dst_port),
	},
};

/* flow classify data */
static int num_classify_rules;
static struct rte_flow_classify_rule *rules[MAX_NUM_CLASSIFY];
static struct rte_flow_classify_ipv4_5tuple_stats ntuple_stats;
static struct rte_flow_classify_stats classify_stats = {
		.stats = (void **)&ntuple_stats
};

/* parameters for rte_flow_classify_validate and
 * rte_flow_classify_table_entry_add functions
 */

static struct rte_flow_item  eth_item = { RTE_FLOW_ITEM_TYPE_ETH,
	0, 0, 0 };
static struct rte_flow_item  end_item = { RTE_FLOW_ITEM_TYPE_END,
	0, 0, 0 };

/* sample actions:
 * "actions count / end"
 */
struct rte_flow_query_count count = {
	.reset = 1,
	.hits_set = 1,
	.bytes_set = 1,
	.hits = 0,
	.bytes = 0,
};
// 启用流量计数器
static struct rte_flow_action count_action = { RTE_FLOW_ACTION_TYPE_COUNT,
	&count};
static struct rte_flow_action end_action = { RTE_FLOW_ACTION_TYPE_END, 0};
// rte_flow_action 结构体数组(terminated by the END pattern item),表示流规则的动作,比如QUEUE, DROP, END等等
// 这里的action有两个动作,分别是计数和结束
static struct rte_flow_action actions[2];

/* sample attributes */
static struct rte_flow_attr attr;	// 代表的一条流规则属性

/* flow_classify.c: * Based on DPDK skeleton forwarding example. */

/*
 * Initializes a given port using global settings and with the RX buffers
 * coming from the mbuf_pool passed as a parameter.
 */
static inline int
port_init(uint8_t port, struct rte_mempool *mbuf_pool)
{
	struct rte_eth_conf port_conf = port_conf_default;
	struct ether_addr addr;
	const uint16_t rx_rings = 1, tx_rings = 1;
	int retval;
	uint16_t q;
	struct rte_eth_dev_info dev_info;
	struct rte_eth_txconf txconf;

	if (!rte_eth_dev_is_valid_port(port))
		return -1;

	// 设置port的属性
	rte_eth_dev_info_get(port, &dev_info);
	if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
		port_conf.txmode.offloads |=
			DEV_TX_OFFLOAD_MBUF_FAST_FREE;

	/* Configure the Ethernet device. */
	// 设置网卡接收和发送队列的个数以及属性
	retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
	if (retval != 0)
		return retval;

	/* Allocate and set up 1 RX queue per Ethernet port. */
	for (q = 0; q < rx_rings; q++) {
		// 分配一个接收队列
		retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
				rte_eth_dev_socket_id(port), NULL, mbuf_pool);
		if (retval < 0)
			return retval;
	}

	txconf = dev_info.default_txconf;
	txconf.offloads = port_conf.txmode.offloads;
	/* Allocate and set up 1 TX queue per Ethernet port. */
	for (q = 0; q < tx_rings; q++) {
		// 分配和设置一个发送队列
		retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
				rte_eth_dev_socket_id(port), &txconf);
		if (retval < 0)
			return retval;
	}

	/* Start the Ethernet port. */
	// 开启网卡转发
	retval = rte_eth_dev_start(port);
	if (retval < 0)
		return retval;

	/* Display the port MAC address. */
	rte_eth_macaddr_get(port, &addr);
	printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
			   " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
			port,
			addr.addr_bytes[0], addr.addr_bytes[1],
			addr.addr_bytes[2], addr.addr_bytes[3],
			addr.addr_bytes[4], addr.addr_bytes[5]);

	/* Enable RX in promiscuous mode for the Ethernet device. */
	// 开始网卡的混杂模式
	rte_eth_promiscuous_enable(port);

	return 0;
}

/*
 * The lcore main. This is the main thread that does the work, reading from
 * an input port classifying the packets and writing to an output port.
 */
static __attribute__((noreturn)) void
lcore_main(struct flow_classifier *cls_app)
{
	uint16_t port;
	int ret;
	int i = 0;
	/* 
		从flow_classifier表中删除流分类规则
		cls_app->cls: 流分类器句柄
		rules[7]: 流分类规则
	*/
	ret = rte_flow_classify_table_entry_delete(cls_app->cls,
			rules[7]);
	if (ret)
		printf("table_entry_delete failed [7] %d\n\n", ret);
	else
		printf("table_entry_delete succeeded [7]\n\n");

	/*
	 * Check that the port is on the same NUMA node as the polling thread
	 * for best performance.
	 */
	RTE_ETH_FOREACH_DEV(port)
		if (rte_eth_dev_socket_id(port) > 0 &&
			rte_eth_dev_socket_id(port) != (int)rte_socket_id()) {
			printf("\n\n");
			printf("WARNING: port %u is on remote NUMA node\n",
			       port);
			printf("to polling thread.\n");
			printf("Performance will not be optimal.\n");
		}
	printf("\nCore %u forwarding packets. ", rte_lcore_id());
	printf("[Ctrl+C to quit]\n");

	/* Run until the application is quit or killed. */
	for (;;) {
		/*
		 * Receive packets on a port, classify them and forward them
		 * on the paired port.
		 * The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
		 */
		RTE_ETH_FOREACH_DEV(port) {
			/* Get burst of RX packets, from first port of pair. */
			struct rte_mbuf *bufs[BURST_SIZE];
			// 接收数据报文
			const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
					bufs, BURST_SIZE);

			if (unlikely(nb_rx == 0))
				continue;

			// 遍历rules
			for (i = 0; i < MAX_NUM_CLASSIFY; i++) {
				if (rules[i]) {
					
					/*
						查看burst中是否有任何数据包与表中的一条流规则匹配
						cls_app->cls: 流分类器句柄
						bufs: 指向数据报文
						nb_rx: 数据报文个数
						rules: 流分类器规则
						classify_stats: 流分类器统计
					*/
					ret = rte_flow_classifier_query(
						cls_app->cls,
						bufs, nb_rx, rules[i],
						&classify_stats);
					if (ret)
						printf(
							"rule [%d] query failed ret [%d]\n\n",
							i, ret);
					else {
						printf(
						"rule[%d] count=%"PRIu64"\n",
						i, ntuple_stats.counter1);

						printf("proto = %d\n",
						ntuple_stats.ipv4_5tuple.proto);
					}
				}
			}

			/* Send burst of TX packets, to second port of pair. */
			const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
					bufs, nb_rx);

			/* Free any unsent packets. */
			if (unlikely(nb_tx < nb_rx)) {
				uint16_t buf;

				for (buf = nb_tx; buf < nb_rx; buf++)
					rte_pktmbuf_free(bufs[buf]);
			}
		}
	}
}

/*
 * Parse IPv4 5 tuple rules file, ipv4_rules_file.txt.
 * Expected format:
 * <src_ipv4_addr>'/'<masklen> <space> \
 * <dst_ipv4_addr>'/'<masklen> <space> \
 * <src_port> <space> ":" <src_port_mask> <space> \
 * <dst_port> <space> ":" <dst_port_mask> <space> \
 * <proto>'/'<proto_mask> <space> \
 * <priority>
 */

static int
get_cb_field(char **in, uint32_t *fd, int base, unsigned long lim,
		char dlm)
{
	unsigned long val;
	char *end;

	errno = 0;
	val = strtoul(*in, &end, base);
	if (errno != 0 || end[0] != dlm || val > lim)
		return -EINVAL;
	*fd = (uint32_t)val;
	*in = end + 1;
	return 0;
}

static int
parse_ipv4_net(char *in, uint32_t *addr, uint32_t *mask_len)
{
	uint32_t a, b, c, d, m;

	if (get_cb_field(&in, &a, 0, UINT8_MAX, '.'))
		return -EINVAL;
	if (get_cb_field(&in, &b, 0, UINT8_MAX, '.'))
		return -EINVAL;
	if (get_cb_field(&in, &c, 0, UINT8_MAX, '.'))
		return -EINVAL;
	if (get_cb_field(&in, &d, 0, UINT8_MAX, '/'))
		return -EINVAL;
	if (get_cb_field(&in, &m, 0, sizeof(uint32_t) * CHAR_BIT, 0))
		return -EINVAL;

	addr[0] = IPv4(a, b, c, d);
	mask_len[0] = m;
	return 0;
}

static int
parse_ipv4_5tuple_rule(char *str, struct rte_eth_ntuple_filter *ntuple_filter)
{
	int i, ret;
	char *s, *sp, *in[CB_FLD_NUM];
	static const char *dlm = " \t\n";
	int dim = CB_FLD_NUM;
	uint32_t temp;

	// 解析传入的字符串,将结果存入in数组中
	s = str;
	for (i = 0; i != dim; i++, s = NULL) {
		in[i] = strtok_r(s, dlm, &sp);
		if (in[i] == NULL)
			return -EINVAL;
		printf("============ %s\n", in[i]);
	}

	// 解析源ip地址和子网掩码
	ret = parse_ipv4_net(in[CB_FLD_SRC_ADDR],
			&ntuple_filter->src_ip,
			&ntuple_filter->src_ip_mask);
	if (ret != 0) {
		flow_classify_log("failed to read source address/mask: %s\n",
			in[CB_FLD_SRC_ADDR]);
		return ret;
	}

	// 解析目的ip地址和子网掩码
	ret = parse_ipv4_net(in[CB_FLD_DST_ADDR],
			&ntuple_filter->dst_ip,
			&ntuple_filter->dst_ip_mask);
	if (ret != 0) {
		flow_classify_log("failed to read source address/mask: %s\n",
			in[CB_FLD_DST_ADDR]);
		return ret;
	}

	// 获取源端口号
	if (get_cb_field(&in[CB_FLD_SRC_PORT], &temp, 0, UINT16_MAX, 0))
		return -EINVAL;
	ntuple_filter->src_port = (uint16_t)temp;

	if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim,
			sizeof(cb_port_delim)) != 0)
		return -EINVAL;

	// 获取源端口掩码
	if (get_cb_field(&in[CB_FLD_SRC_PORT_MASK], &temp, 0, UINT16_MAX, 0))
		return -EINVAL;
	ntuple_filter->src_port_mask = (uint16_t)temp;

	// 获取目的端口号
	if (get_cb_field(&in[CB_FLD_DST_PORT], &temp, 0, UINT16_MAX, 0))
		return -EINVAL;
	ntuple_filter->dst_port = (uint16_t)temp;

	if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim,
			sizeof(cb_port_delim)) != 0)
		return -EINVAL;

	// 获取目的端口掩码
	if (get_cb_field(&in[CB_FLD_DST_PORT_MASK], &temp, 0, UINT16_MAX, 0))
		return -EINVAL;
	ntuple_filter->dst_port_mask = (uint16_t)temp;

	// 获取l4协议号
	if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, '/'))
		return -EINVAL;
	ntuple_filter->proto = (uint8_t)temp;

	// 获取协议号掩码
	if (get_cb_field(&in[CB_FLD_PROTO], &temp, 0, UINT8_MAX, 0))
		return -EINVAL;
	ntuple_filter->proto_mask = (uint8_t)temp;

	// 获取优先级
	if (get_cb_field(&in[CB_FLD_PRIORITY], &temp, 0, UINT16_MAX, 0))
		return -EINVAL;
	ntuple_filter->priority = (uint16_t)temp;
	if (ntuple_filter->priority > FLOW_CLASSIFY_MAX_PRIORITY)
		ret = -EINVAL;

	return ret;
}

/* Bypass comment and empty lines */
static inline int
is_bypass_line(char *buff)
{
	int i = 0;

	/* comment line */
	if (buff[0] == COMMENT_LEAD_CHAR)
		return 1;
	/* empty line */
	while (buff[i] != '\0') {
		if (!isspace(buff[i]))
			return 0;
		i++;
	}
	return 1;
}

static uint32_t
convert_depth_to_bitmask(uint32_t depth_val)
{
	uint32_t bitmask = 0;
	int i, j;

	for (i = depth_val, j = 0; i > 0; i--, j++)
		bitmask |= (1 << (31 - j));
	return bitmask;
}

static int
add_classify_rule(struct rte_eth_ntuple_filter *ntuple_filter,
		struct flow_classifier *cls_app)
{
	int ret = -1;
	int key_found;
	
    /* rte_flow_item: ACL 规则的详细内容。
    会从最低协议层开始堆叠flow_item来形成一个匹配模式。必须由 end_item 结尾。
    */
	struct rte_flow_error error;
	struct rte_flow_item_ipv4 ipv4_spec;
	struct rte_flow_item_ipv4 ipv4_mask;
	struct rte_flow_item ipv4_udp_item;
	struct rte_flow_item ipv4_tcp_item;
	struct rte_flow_item ipv4_sctp_item;
	struct rte_flow_item_udp udp_spec;
	struct rte_flow_item_udp udp_mask;
	struct rte_flow_item udp_item;
	struct rte_flow_item_tcp tcp_spec;
	struct rte_flow_item_tcp tcp_mask;
	struct rte_flow_item tcp_item;
	struct rte_flow_item_sctp sctp_spec;
	struct rte_flow_item_sctp sctp_mask;
	struct rte_flow_item sctp_item;
	struct rte_flow_item pattern_ipv4_5tuple[4];
	struct rte_flow_classify_rule *rule;
	uint8_t ipv4_proto;

	if (num_classify_rules >= MAX_NUM_CLASSIFY) {
		printf(
			"\nINFO:  classify rule capacity %d reached\n",
			num_classify_rules);
		return ret;
	}

	/* set up parameters for validate and add */
	memset(&ipv4_spec, 0, sizeof(ipv4_spec));
	// 填充ip头部协议字段(上层协议)
	ipv4_spec.hdr.next_proto_id = ntuple_filter->proto;
	// 填充ip头部源ip地址
	ipv4_spec.hdr.src_addr = ntuple_filter->src_ip;
	// 填充ip头部目的ip地址
	ipv4_spec.hdr.dst_addr = ntuple_filter->dst_ip;
	ipv4_proto = ipv4_spec.hdr.next_proto_id;

	// TODO:ipv4_mask的作用是什么?
	memset(&ipv4_mask, 0, sizeof(ipv4_mask));
	ipv4_mask.hdr.next_proto_id = ntuple_filter->proto_mask;
	ipv4_mask.hdr.src_addr = ntuple_filter->src_ip_mask;
	// 转化为掩码
	ipv4_mask.hdr.src_addr =
		convert_depth_to_bitmask(ipv4_mask.hdr.src_addr);
	ipv4_mask.hdr.dst_addr = ntuple_filter->dst_ip_mask;
	ipv4_mask.hdr.dst_addr =
		convert_depth_to_bitmask(ipv4_mask.hdr.dst_addr);

	// 根据ip头部中的协议字段来进行分类处理
	switch (ipv4_proto) {
	case IPPROTO_UDP:
		// 如果是UDP
		// 匹配IPV4
		ipv4_udp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;
		ipv4_udp_item.spec = &ipv4_spec;
		ipv4_udp_item.mask = &ipv4_mask;
		ipv4_udp_item.last = NULL;

		// 填充UDP头部
		// 填充UDP字段源端口号
		udp_spec.hdr.src_port = ntuple_filter->src_port;
		// 填充UDP字段目的端口号
		udp_spec.hdr.dst_port = ntuple_filter->dst_port;
		// 填充UDP字段数据长度
		udp_spec.hdr.dgram_len = 0;
		// 填充UDP字段数据校验和
		udp_spec.hdr.dgram_cksum = 0;

		// 填充udp的掩码
		udp_mask.hdr.src_port = ntuple_filter->src_port_mask;
		udp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;
		udp_mask.hdr.dgram_len = 0;
		udp_mask.hdr.dgram_cksum = 0;

		// 匹配UDP
		udp_item.type = RTE_FLOW_ITEM_TYPE_UDP;
		udp_item.spec = &udp_spec;
		udp_item.mask = &udp_mask;
		udp_item.last = NULL;

		// 设置组内规则优先级属性
		attr.priority = ntuple_filter->priority;
		// 将每个规则添加到规则数组中
		pattern_ipv4_5tuple[1] = ipv4_udp_item;
		pattern_ipv4_5tuple[2] = udp_item;
		break;
	case IPPROTO_TCP:
		// 如果是TCP
		// 匹配IPV4
		ipv4_tcp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;
		ipv4_tcp_item.spec = &ipv4_spec;
		ipv4_tcp_item.mask = &ipv4_mask;
		ipv4_tcp_item.last = NULL;

		// 填充TCP头部信息
		memset(&tcp_spec, 0, sizeof(tcp_spec));
		// 填充TCP头部字段源端口号
		tcp_spec.hdr.src_port = ntuple_filter->src_port;
		// 填充TCP头部字段目的端口号
		tcp_spec.hdr.dst_port = ntuple_filter->dst_port;
		
		// 填充TCP掩码
		memset(&tcp_mask, 0, sizeof(tcp_mask));
		tcp_mask.hdr.src_port = ntuple_filter->src_port_mask;
		tcp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;

		// 匹配TCP
		tcp_item.type = RTE_FLOW_ITEM_TYPE_TCP;
		tcp_item.spec = &tcp_spec;
		tcp_item.mask = &tcp_mask;
		tcp_item.last = NULL;

		// 设置组内规则优先级
		attr.priority = ntuple_filter->priority;
		// 将每个规则添加到规则数组中
		pattern_ipv4_5tuple[1] = ipv4_tcp_item;
		pattern_ipv4_5tuple[2] = tcp_item;
		break;
	case IPPROTO_SCTP:
		// 如果是SCTP
		// 匹配IPV4
		ipv4_sctp_item.type = RTE_FLOW_ITEM_TYPE_IPV4;
		ipv4_sctp_item.spec = &ipv4_spec;
		ipv4_sctp_item.mask = &ipv4_mask;
		ipv4_sctp_item.last = NULL;

		// 填充SCTP头部字段
		sctp_spec.hdr.src_port = ntuple_filter->src_port;
		sctp_spec.hdr.dst_port = ntuple_filter->dst_port;
		sctp_spec.hdr.cksum = 0;
		sctp_spec.hdr.tag = 0;

		sctp_mask.hdr.src_port = ntuple_filter->src_port_mask;
		sctp_mask.hdr.dst_port = ntuple_filter->dst_port_mask;
		sctp_mask.hdr.cksum = 0;
		sctp_mask.hdr.tag = 0;

		// 匹配SCTP
		sctp_item.type = RTE_FLOW_ITEM_TYPE_SCTP;
		sctp_item.spec = &sctp_spec;
		sctp_item.mask = &sctp_mask;
		sctp_item.last = NULL;

		// 将每个规则添加到规则数组中
		attr.priority = ntuple_filter->priority;
		pattern_ipv4_5tuple[1] = ipv4_sctp_item;
		pattern_ipv4_5tuple[2] = sctp_item;
		break;
	default:
		return ret;
	}

	// 规则适用于入口流量
	attr.ingress = 1;
	// 匹配二层数据报文
	pattern_ipv4_5tuple[0] = eth_item;
	// 结束匹配
	pattern_ipv4_5tuple[3] = end_item;

	// 指定action
	actions[0] = count_action;
	actions[1] = end_action;

	/* Validate and add rule */
	/*
		流分类验证
		cls_app->cls: 流分类器实例
		attr: 流规则属性
		pattern_ipv4_5tuple: 模式指定(列表由END模式项终止)
		actions: 关联动作(列表由END模式项终止)
		error: 如果不为NULL,则执行详细的错误报告。仅在发生错误的情况下初始化结构
	*/
	ret = rte_flow_classify_validate(cls_app->cls, &attr,
			pattern_ipv4_5tuple, actions, &error);
	if (ret) {
		printf("table entry validate failed ipv4_proto = %u\n",
			ipv4_proto);
		return ret;
	}

	/*
		将流分类规则添加到flow_classifier表中
		cls_app->cls: 流分类器实例
		attr: 流规则属性
		pattern_ipv4_5tuple: 模式指定(列表由END模式项终止)
		actions: 关联动作(列表由END模式项终止)
		key_found: 如果规则已经存在,则返回1,否则返回0
		error: 如果不为NULL,则执行详细的错误报告。仅在发生错误的情况下初始化结构

		成功时返回有效句柄rule
	*/
	rule = rte_flow_classify_table_entry_add(
			cls_app->cls, &attr, pattern_ipv4_5tuple,
			actions, &key_found, &error);
	if (rule == NULL) {
		printf("table entry add failed ipv4_proto = %u\n",
			ipv4_proto);
		ret = -1;
		return ret;
	}

	// 将句柄存放在rules数组中
	rules[num_classify_rules] = rule;
	num_classify_rules++;
	return 0;
}

static int
add_rules(const char *rule_path, struct flow_classifier *cls_app)
{
	FILE *fh;
	char buff[LINE_MAX];
	unsigned int i = 0;
	unsigned int total_num = 0;
	//用于定义ntuple过滤器条目的结构
	struct rte_eth_ntuple_filter ntuple_filter;
	int ret;

	// 打开指定的文件
	fh = fopen(rule_path, "rb");
	if (fh == NULL)
		rte_exit(EXIT_FAILURE, "%s: fopen %s failed\n", __func__,
			rule_path);

	// 移动到文件头部
	ret = fseek(fh, 0, SEEK_SET);
	if (ret)
		rte_exit(EXIT_FAILURE, "%s: fseek %d failed\n", __func__,
			ret);

	i = 0;
	// 循环读取指定的文件的每一行
	while (fgets(buff, LINE_MAX, fh) != NULL) {

		// 跳过注释和空行
		if (is_bypass_line(buff))
			continue;

		if (total_num >= FLOW_CLASSIFY_MAX_RULE_NUM - 1) {
			printf("\nINFO: classify rule capacity %d reached\n",
				total_num);
			break;
		}
		
		// 解析5元组,存放在ntuple_filter中
		if (parse_ipv4_5tuple_rule(buff, &ntuple_filter) != 0)
			rte_exit(EXIT_FAILURE,
				"%s Line %u: parse rules error\n",
				rule_path, i);

		// 将5元组加入分类规则中
		if (add_classify_rule(&ntuple_filter, cls_app) != 0)
			rte_exit(EXIT_FAILURE, "add rule error\n");

		total_num++;
	}

	fclose(fh);
	return 0;
}

/* display usage */
static void
print_usage(const char *prgname)
{
	printf("%s usage:\n", prgname);
	printf("[EAL options] --  --"OPTION_RULE_IPV4"=FILE: ");
	printf("specify the ipv4 rules file.\n");
	printf("Each rule occupies one line in the file.\n");
}

/* Parse the argument given in the command line of the application */
static int
parse_args(int argc, char **argv)
{
	int opt, ret;
	char **argvopt;
	int option_index;
	char *prgname = argv[0];
	static struct option lgopts[] = {
		{OPTION_RULE_IPV4, 1, 0, 0},
		{NULL, 0, 0, 0}
	};

	argvopt = argv;

	while ((opt = getopt_long(argc, argvopt, "",
				lgopts, &option_index)) != EOF) {

		switch (opt) {
		/* long options */
		case 0:
			if (!strncmp(lgopts[option_index].name,
					OPTION_RULE_IPV4,
					sizeof(OPTION_RULE_IPV4)))
				parm_config.rule_ipv4_name = optarg;
			break;
		default:
			print_usage(prgname);
			return -1;
		}
	}

	if (optind >= 0)
		argv[optind-1] = prgname;

	ret = optind-1;
	optind = 1; /* reset getopt lib */
	return ret;
}

/*
 * The main function, which does initialization and calls the lcore_main
 * function.
 */
int
main(int argc, char *argv[])
{
	struct rte_mempool *mbuf_pool;
	uint16_t nb_ports;
	uint16_t portid;
	int ret;
	int socket_id;
	// ACL(访问控制列表)参数
	struct rte_table_acl_params table_acl_params;
	// 创建ACL table表参数
	struct rte_flow_classify_table_params cls_table_params;
	struct flow_classifier *cls_app;
	// 流分类器创建参数
	struct rte_flow_classifier_params cls_params;
	uint32_t size;

	/* Initialize the Environment Abstraction Layer (EAL). */
	// 初始化eal层
	ret = rte_eal_init(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");

	argc -= ret;
	argv += ret;

	/* parse application arguments (after the EAL ones) */
	//解析参数
	ret = parse_args(argc, argv);
	if (ret < 0)
		rte_exit(EXIT_FAILURE, "Invalid flow_classify parameters\n");

	/* Check that there is an even number of ports to send/receive on. */
	// 获取有效网卡的个数
	nb_ports = rte_eth_dev_count_avail();
	if (nb_ports < 2 || (nb_ports & 1))
		rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");

	/* Creates a new mempool in memory to hold the mbufs. */
	/* 
		创建mbuf_pool
	*/
	mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
		MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());

	if (mbuf_pool == NULL)
		rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");

	/* Initialize all ports. */
	RTE_ETH_FOREACH_DEV(portid)
		if (port_init(portid, mbuf_pool) != 0)
			rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n",
					portid);

	// 只在一个核心上运行
	if (rte_lcore_count() > 1)
		printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");

	socket_id = rte_eth_dev_socket_id(0);

	/* Memory allocation */
	// 分配一个struct flow_classifier_acl大小的缓存
	size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct flow_classifier_acl));
	// malloc后必须调用free,TODO: 和C语言中的malloc类似
	cls_app = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
	if (cls_app == NULL)
		rte_exit(EXIT_FAILURE, "Cannot allocate classifier memory\n");

	// 流分类器参数的name
	cls_params.name = "flow_classifier";
	// 流分类器参数的socket_id
	cls_params.socket_id = socket_id;

	// 创建流分类器,创建成功返回流分类器实例,创建失败返回NULL
	cls_app->cls = rte_flow_classifier_create(&cls_params);
	if (cls_app->cls == NULL) {
		rte_free(cls_app);
		rte_exit(EXIT_FAILURE, "Cannot create classifier\n");
	}

	/* initialise ACL table params */
	// 设置ACL的name字段
	table_acl_params.name = "table_acl_ipv4_5tuple";
	// ACL表中的最大规则数,这里的91条
	table_acl_params.n_rules = FLOW_CLASSIFY_MAX_RULE_NUM;
	// ACL表中的字段数
	table_acl_params.n_rule_fields = RTE_DIM(ipv4_defs);
	// 拷贝ACL表中的规则
	memcpy(table_acl_params.field_format, ipv4_defs, sizeof(ipv4_defs));

	/* initialise table create params */
	// ACL表操作ops
	cls_table_params.ops = &rte_table_acl_ops;
	// 传递给创建ACL函数的参数,这里是ACL创建的时候传递的结构体
	cls_table_params.arg_create = &table_acl_params;
	// 分类表类型
	cls_table_params.type = RTE_FLOW_CLASSIFY_TABLE_ACL_IP4_5TUPLE;
	
	/*
		流量分类表的创建
		cls_app->cls: 处理流分类器实例指针
		cls_table_params: 用于流量分类表参数
	*/
	ret = rte_flow_classify_table_create(cls_app->cls, &cls_table_params);
	if (ret) {
		rte_flow_classifier_free(cls_app->cls);
		rte_free(cls_app);
		rte_exit(EXIT_FAILURE, "Failed to create classifier table\n");
	}

	/* read file of IPv4 5 tuple rules and initialize parameters
	 * for rte_flow_classify_validate and rte_flow_classify_table_entry_add
	 * API's.
	 */
	/*
		读取IPV4的5元组文件,作为rte_flow_classify_validate和rte_flow_classify_table_entry_add的初始化参数
		parm_config.rule_ipv4_name: 在本程序中主要是--rule_ipv4="../ipv4_rules_file.txt"后面跟的参数ipv4_rules_file.txt
		cls_app: 自定义数据结构
	*/
	if (add_rules(parm_config.rule_ipv4_name, cls_app)) {
		rte_flow_classifier_free(cls_app->cls);
		rte_free(cls_app);
		rte_exit(EXIT_FAILURE, "Failed to add rules\n");
	}

	/* Call lcore_main on the master core only. */
	lcore_main(cls_app);

	return 0;
}

代码的相关流程和说明请看代码中的注释。

这里简单描述一下该例程实现的功能

  • 首先我们需要至少绑定2的倍数的网卡
  • 创建flow分类器
  • 然后会读取例程中ipv4_rules_file.txt文件(文件中主要是5元组)。将规则绑定到flow分类器中
  • 配置flow分类器中规则的action(该例程中主要是统计的action)
  • 配置网卡的属性(配置发送和接收队列)
  • 将接收到的数据交给flow分类器。如果匹配则统计数据

这里给出几个网上的参考链接,flow模块比较困难,需要好好的研究。DPDK flow_classify 源码阅读


本篇文章大量参考了上文链接中的文章相关内容。但是为什么还需要写出来呢?因为链接中的文章只是对于程序作了大致的介绍。由于flow模块非常重要,所以我补充了相关知识梳理的流程图(该流程图主要是描述了分类器的创建和添加规则的流程,以及分类器的组成,个人认为这一部分才是核心):
分类器流程图

关于运行的结果我会在后面添加。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值