阶段一完成后会将upcall中的相应信息构造早flow_miss中,接下来批量处理,查找facet,如果没有找到的话就要根据ofproto_dpif->rule和flow_miss->flow来创建facet,然后为其构建subfacet,继而subfacet_make_actions会由subfacet->rule->ofpacts相关信息构造odp_actions,然后根据具体的openflow
action header中的action type,进而进行具体的操作(do_xlate_actions)。这里的主函数式handle_flow_miss。流程图如下:
-------------------------
static void handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, struct flow_miss_op *ops, size_t *n_ops)
{
struct facet *facet;
long long int now;
uint32_t hash;
/* The caller must ensure that miss->hmap_node.hash contains flow_hash(miss->flow, 0). */
hash = miss->hmap_node.hash; //定位到miss->flow所在的这个桶;
//从ofproto的facet表中找到与这个flow完全匹配的项;
facet = facet_lookup_valid(ofproto, &miss->flow, hash);
if (!facet) {
struct rule_dpif *rule = rule_dpif_lookup(ofproto, &miss->flow);
if (!flow_miss_should_make_facet(ofproto, miss, hash)) {
handle_flow_miss_without_facet(miss, rule, ops, n_ops);
return;
}
facet = facet_create(rule, &miss->flow, hash);
now = facet->used;
} else {
now = time_msec();
}
handle_flow_miss_with_facet(miss, facet, now, ops, n_ops);
}
static void handle_flow_miss(struct ofproto_dpif *ofproto, struct flow_miss *miss, struct flow_miss_op *ops, size_t *n_ops)
{
struct facet *facet;
long long int now;
uint32_t hash;
/* The caller must ensure that miss->hmap_node.hash contains flow_hash(miss->flow, 0). */
hash = miss->hmap_node.hash; //定位到miss->flow所在的这个桶;
//从ofproto的facet表中找到与这个flow完全匹配的项;
facet = facet_lookup_valid(ofproto, &miss->flow, hash);
if (!facet) {
struct rule_dpif *rule = rule_dpif_lookup(ofproto, &miss->flow);
if (!flow_miss_should_make_facet(ofproto, miss, hash)) {
handle_flow_miss_without_facet(miss, rule, ops, n_ops);
return;
}
facet = facet_create(rule, &miss->flow, hash);
now = facet->used;
} else {
now = time_msec();
}
handle_flow_miss_with_facet(miss, facet, now, ops, n_ops);
}
结构体ofproto中有个字段是 struct oftable *tables,这里就是要找到一个和flow完全相同的facet,必须hash= flow_hash(flow, 0) ??该函数比下面的facet_find多了一个revalidate的操作;
static struct facet * facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow, uint32_t hash)
{
struct facet *facet;
facet = facet_find(ofproto, flow, hash);
if (facet && (ofproto->need_revalidate || tag_set_intersects(&ofproto->revalidate_set, facet->tags))) {
facet_revalidate(facet);
}
return facet;
}
struct facet *facet;
facet = facet_find(ofproto, flow, hash);
if (facet && (ofproto->need_revalidate || tag_set_intersects(&ofproto->revalidate_set, facet->tags))) {
facet_revalidate(facet);
}
return facet;
}
static struct facet * facet_find(struct ofproto_dpif *ofproto, const struct flow *flow, uint32_t hash)
{
struct facet *facet;
HMAP_FOR_EACH_WITH_HASH (facet, hmap_node, hash, &ofproto->facets) {
if (flow_equal(flow, &facet->flow)) {
return facet;
}
}
return NULL;
}
{
struct facet *facet;
HMAP_FOR_EACH_WITH_HASH (facet, hmap_node, hash, &ofproto->facets) {
if (flow_equal(flow, &facet->flow)) {
return facet;
}
}
return NULL;
}
通过递归跟进,最终会返回ofproto_dpif 中的规则类型no_packet_in_rule或者 miss_rule。
static struct rule_dpif * rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow)
{
struct rule_dpif *rule;
rule = rule_dpif_lookup__(ofproto, flow, 0);
if (rule) {
return rule;
}
return rule_dpif_miss_rule(ofproto, flow); //如果在流表中没有找到的话;
}
static struct rule_dpif * rule_dpif_lookup__(struct ofproto_dpif *ofproto, const struct flow *flow, uint8_t table_id)
{
struct cls_rule *cls_rule;
struct classifier *cls;
if (table_id >= N_TABLES) {
return NULL;
}
cls = &ofproto->up.tables[table_id].cls;
if (flow->nw_frag & FLOW_NW_FRAG_ANY
&& ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
/* For OFPC_NORMAL frag_handling, we must pretend that transport ports
* are unavailable. */
struct flow ofpc_normal_flow = *flow;
ofpc_normal_flow.tp_src = htons(0);
ofpc_normal_flow.tp_dst = htons(0);
cls_rule = classifier_lookup(cls, &ofpc_normal_flow);
} else {
cls_rule = classifier_lookup(cls, flow);
}
return rule_dpif_cast(rule_from_cls_rule(cls_rule));
}
{
struct rule_dpif *rule;
rule = rule_dpif_lookup__(ofproto, flow, 0);
if (rule) {
return rule;
}
return rule_dpif_miss_rule(ofproto, flow); //如果在流表中没有找到的话;
}
static struct rule_dpif * rule_dpif_lookup__(struct ofproto_dpif *ofproto, const struct flow *flow, uint8_t table_id)
{
struct cls_rule *cls_rule;
struct classifier *cls;
if (table_id >= N_TABLES) {
return NULL;
}
cls = &ofproto->up.tables[table_id].cls;
if (flow->nw_frag & FLOW_NW_FRAG_ANY
&& ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
/* For OFPC_NORMAL frag_handling, we must pretend that transport ports
* are unavailable. */
struct flow ofpc_normal_flow = *flow;
ofpc_normal_flow.tp_src = htons(0);
ofpc_normal_flow.tp_dst = htons(0);
cls_rule = classifier_lookup(cls, &ofpc_normal_flow);
} else {
cls_rule = classifier_lookup(cls, flow);
}
return rule_dpif_cast(rule_from_cls_rule(cls_rule));
}
---------lib/classifier.h/c
流分类器(Flow classifier)
/* A flow classifier. */
struct classifier {
int n_rules; /* Total number of rules. */
struct hmap tables; /* Contains "struct cls_table"s. */
};
/* A set of rules that all have the same fields wildcarded. */
struct cls_table {
struct hmap_node hmap_node; /* Within struct classifier 'tables' hmap. */
struct hmap rules; /* Contains "struct cls_rule"s. */
struct minimask mask; /* Wildcards for fields. */
int n_table_rules; /* Number of rules, including duplicates. */
};
struct classifier {
int n_rules; /* Total number of rules. */
struct hmap tables; /* Contains "struct cls_table"s. */
};
/* A set of rules that all have the same fields wildcarded. */
struct cls_table {
struct hmap_node hmap_node; /* Within struct classifier 'tables' hmap. */
struct hmap rules; /* Contains "struct cls_rule"s. */
struct minimask mask; /* Wildcards for fields. */
int n_table_rules; /* Number of rules, including duplicates. */
};
/* A rule in a "struct classifier". */
struct cls_rule {
struct hmap_node hmap_node; /* Within struct cls_table 'rules'. */
struct list list; /* List of identical, lower-priority rules. */
struct minimatch match; /* Matching rule. */
unsigned int priority; /* Larger numbers are higher priorities. */
};
struct cls_rule {
struct hmap_node hmap_node; /* Within struct cls_table 'rules'. */
struct list list; /* List of identical, lower-priority rules. */
struct minimatch match; /* Matching rule. */
unsigned int priority; /* Larger numbers are higher priorities. */
};
------------lib/match.h
那么何为minimatch?压缩匹配(Compressed match),它是struct match的一种稀疏表示,它和struct match有着相同的不变量,即flow中的一位必须对应于掩码mask中的一位,对下面的miniflow,minimask 这种不变性也是保持的,意味着flow和mask可以有不同的映射,
* The invariants for the underlying miniflow and minimask are also maintained,
* which means that 'flow' and 'mask' can have different 'map's. In
* particular, if the match checks that a given 32-bit field has value 0, then
* 'map' will have a 1-bit in 'mask' but a 0-bit in 'flow' for that field. */
struct minimatch {
struct miniflow flow;
struct minimask mask;
};
/* A flow classification match.
*
* Use one of the match_*() functions to initialize a "struct match".
*
* The match_*() functions below maintain the following important invariant.
* If a bit or a field is wildcarded in 'wc', then the corresponding bit or
* field in 'flow' is set to all-0-bits. (The match_zero_wildcarded_fields()
* function can be used to restore this invariant after adding wildcards.) */
struct match {
struct flow flow;
struct flow_wildcards wc;
};
*
* Use one of the match_*() functions to initialize a "struct match".
*
* The match_*() functions below maintain the following important invariant.
* If a bit or a field is wildcarded in 'wc', then the corresponding bit or
* field in 'flow' is set to all-0-bits. (The match_zero_wildcarded_fields()
* function can be used to restore this invariant after adding wildcards.) */
struct match {
struct flow flow;
struct flow_wildcards wc;
};
这里暂时栈到这里,看没有找到rule的情况,rule_dpif_miss_rule!!!!
struct ofproto_dpif {
struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
struct ofproto up;
struct dpif *dpif;
/* Special OpenFlow rules. */
struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ 在哪个阶段被初始化的 ??
/* Statistics. */
uint64_t n_matches;
/* Bridging. */
struct netflow *netflow;
struct dpif_sflow *sflow;
struct hmap bundles; /* Contains "struct ofbundle"s. */
struct mac_learning *ml;
struct ofmirror *mirrors[MAX_MIRRORS];
bool has_mirrors;
bool has_bonded_bundles;
/* Expiration. */
struct timer next_expiration;
/* Facets. */
struct hmap facets;
struct hmap subfacets;
struct governor *governor;
//代表的是在datapath中流建立速率限制器(Flow setup rate limiter),如同发动机中的调节器控制车辆的速度。
/* Revalidation. */
struct table_dpif tables[N_TABLES];
enum revalidate_reason need_revalidate;
struct tag_set revalidate_set;
/* Support for debugging async flow mods. */
struct list completions;
bool has_bundle_action; /* True when the first bundle action appears. */
struct netdev_stats stats; /* To account packets generated and consumed in userspace. */
/* Spanning tree. */
struct stp *stp;
long long int stp_last_tick;
/* VLAN splinters. */
struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */
struct hmap vlandev_map; /* vlandev -> (realdev,vid). */
};
struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
struct ofproto up;
struct dpif *dpif;
/* Special OpenFlow rules. */
struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ 在哪个阶段被初始化的 ??
/* Statistics. */
uint64_t n_matches;
/* Bridging. */
struct netflow *netflow;
struct dpif_sflow *sflow;
struct hmap bundles; /* Contains "struct ofbundle"s. */
struct mac_learning *ml;
struct ofmirror *mirrors[MAX_MIRRORS];
bool has_mirrors;
bool has_bonded_bundles;
/* Expiration. */
struct timer next_expiration;
/* Facets. */
struct hmap facets;
struct hmap subfacets;
struct governor *governor;
//代表的是在datapath中流建立速率限制器(Flow setup rate limiter),如同发动机中的调节器控制车辆的速度。
/* Revalidation. */
struct table_dpif tables[N_TABLES];
enum revalidate_reason need_revalidate;
struct tag_set revalidate_set;
/* Support for debugging async flow mods. */
struct list completions;
bool has_bundle_action; /* True when the first bundle action appears. */
struct netdev_stats stats; /* To account packets generated and consumed in userspace. */
/* Spanning tree. */
struct stp *stp;
long long int stp_last_tick;
/* VLAN splinters. */
struct hmap realdev_vid_map; /* (realdev,vid) -> vlandev. */
struct hmap vlandev_map; /* vlandev -> (realdev,vid). */
};
考虑第一个packet到达的情况,没有找到对应的rule,所以进入到这里。
static struct rule_dpif * rule_dpif_miss_rule(struct ofproto_dpif *ofproto, const struct flow *flow)
{
struct ofport_dpif *port;
port = get_ofp_port(ofproto, flow->in_port);
if (!port) {
VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, flow->in_port);
return ofproto->miss_rule;
}
//如果入口配置的是不允许packet到达,就返回对应的规则;
{
struct ofport_dpif *port;
port = get_ofp_port(ofproto, flow->in_port);
if (!port) {
VLOG_WARN_RL(&rl, "packet-in on unknown port %"PRIu16, flow->in_port);
return ofproto->miss_rule;
}
//如果入口配置的是不允许packet到达,就返回对应的规则;
//这里直接返回ofproto_dpif 结构中no_packet_in_rule, miss_rule,它们在什么时候被初始化的呢??
if (port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN) {
return ofproto->no_packet_in_rule;
}
return ofproto->miss_rule;
}
return ofproto->no_packet_in_rule;
}
return ofproto->miss_rule;
}
struct ofport_dpif {
struct ofport up;
uint32_t odp_port;
struct ofbundle *bundle; /* Bundle that contains this port, if any. */
struct list bundle_node; /* In struct ofbundle's "ports" list. */
struct cfm *cfm; /* Connectivity Fault Management, if any. */
tag_type tag; /* Tag associated with this port. */
uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */
bool may_enable; /* May be enabled in bonds. */
long long int carrier_seq; /* Carrier status changes. */
/* Spanning tree. */
struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */
enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */
long long int stp_state_entered;
struct hmap priorities; /* Map of attached 'priority_to_dscp's. */
/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
*
* This is deprecated. It is only for compatibility with broken device
* drivers in old versions of Linux that do not properly support VLANs when
* VLAN devices are not used. When broken device drivers are no longer in
* widespread use, we will delete these interfaces. */
uint16_t realdev_ofp_port;
int vlandev_vid;
};
struct ofport up;
uint32_t odp_port;
struct ofbundle *bundle; /* Bundle that contains this port, if any. */
struct list bundle_node; /* In struct ofbundle's "ports" list. */
struct cfm *cfm; /* Connectivity Fault Management, if any. */
tag_type tag; /* Tag associated with this port. */
uint32_t bond_stable_id; /* stable_id to use as bond slave, or 0. */
bool may_enable; /* May be enabled in bonds. */
long long int carrier_seq; /* Carrier status changes. */
/* Spanning tree. */
struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */
enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */
long long int stp_state_entered;
struct hmap priorities; /* Map of attached 'priority_to_dscp's. */
/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
*
* This is deprecated. It is only for compatibility with broken device
* drivers in old versions of Linux that do not properly support VLANs when
* VLAN devices are not used. When broken device drivers are no longer in
* widespread use, we will delete these interfaces. */
uint16_t realdev_ofp_port;
int vlandev_vid;
};
作为struct ofproto中的 openflow port成员,我们不应该改变这些字段;
----------------ofproto/ofproto-provider.h
struct ofport {
struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */
struct ofproto *ofproto; /* The ofproto that contains this port. */
struct netdev *netdev;
struct ofputil_phy_port pp;
uint16_t ofp_port; /* OpenFlow port number. */
unsigned int change_seq;
int mtu;
};
struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */
struct ofproto *ofproto; /* The ofproto that contains this port. */
struct netdev *netdev;
struct ofputil_phy_port pp;
uint16_t ofp_port; /* OpenFlow port number. */
unsigned int change_seq;
int mtu;
};
-----------------lib/ofp-util.h
协议openflow 1.0/1.1定义的物理端口抽象数据类型(of spec P17);
struct ofputil_phy_port {
uint16_t port_no;
uint8_t hw_addr[OFP_ETH_ALEN];
char name[OFP_MAX_PORT_NAME_LEN];
enum ofputil_port_config config;
enum ofputil_port_state state;
/* NETDEV_F_* feature bitmasks. */
enum netdev_features curr; /* Current features. */
enum netdev_features advertised; /* Features advertised by the port. */
enum netdev_features supported; /* Features supported by the port. */
enum netdev_features peer; /* Features advertised by peer. */
/* Speed. */
uint32_t curr_speed; /* Current speed, in kbps. */
uint32_t max_speed; /* Maximum supported speed, in kbps. */
};
uint16_t port_no;
uint8_t hw_addr[OFP_ETH_ALEN];
char name[OFP_MAX_PORT_NAME_LEN];
enum ofputil_port_config config;
enum ofputil_port_state state;
/* NETDEV_F_* feature bitmasks. */
enum netdev_features curr; /* Current features. */
enum netdev_features advertised; /* Features advertised by the port. */
enum netdev_features supported; /* Features supported by the port. */
enum netdev_features peer; /* Features advertised by peer. */
/* Speed. */
uint32_t curr_speed; /* Current speed, in kbps. */
uint32_t max_speed; /* Maximum supported speed, in kbps. */
};
这些端口配置位(port config bits)所要表明的是否这个端口 被管理型的关闭,处理802.1D生成树选项,如何处理进出的packets等。
enum ofputil_port_config {
/* OpenFlow 1.0 and 1.1 share these values for these port config bits. */
OFPUTIL_PC_PORT_DOWN = 1 << 0, /* Port is administratively down. */
OFPUTIL_PC_NO_RECV = 1 << 2, /* Drop all packets received by port. */
OFPUTIL_PC_NO_FWD = 1 << 5, /* Drop packets forwarded to port. */
OFPUTIL_PC_NO_PACKET_IN = 1 << 6, /* No send packet-in msgs for port. */
/* OpenFlow 1.0 only. */
OFPUTIL_PC_NO_STP = 1 << 1, /* No 802.1D spanning tree for port. */
OFPUTIL_PC_NO_RECV_STP = 1 << 3, /* Drop received 802.1D STP packets. */
OFPUTIL_PC_NO_FLOOD = 1 << 4, /* Do not include port when flooding. */
/* There are no OpenFlow 1.1-only bits. */
};
/* OpenFlow 1.0 and 1.1 share these values for these port config bits. */
OFPUTIL_PC_PORT_DOWN = 1 << 0, /* Port is administratively down. */
OFPUTIL_PC_NO_RECV = 1 << 2, /* Drop all packets received by port. */
OFPUTIL_PC_NO_FWD = 1 << 5, /* Drop packets forwarded to port. */
OFPUTIL_PC_NO_PACKET_IN = 1 << 6, /* No send packet-in msgs for port. */
/* OpenFlow 1.0 only. */
OFPUTIL_PC_NO_STP = 1 << 1, /* No 802.1D spanning tree for port. */
OFPUTIL_PC_NO_RECV_STP = 1 << 3, /* Drop received 802.1D STP packets. */
OFPUTIL_PC_NO_FLOOD = 1 << 4, /* Do not include port when flooding. */
/* There are no OpenFlow 1.1-only bits. */
};
static struct ofport_dpif * get_ofp_port(const struct ofproto_dpif *ofproto, uint16_t ofp_port)
{
struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
return ofport ? ofport_dpif_cast(ofport) : NULL;
}
{
struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
return ofport ? ofport_dpif_cast(ofport) : NULL;
}
根据端口号找到对应的ofport结构体,struct ofport又是 struct ofport_dpif 的成员,所以通过container_of 得到外层容器的地址即可;
---------------ofproto/ofproto.c
struct ofport * ofproto_get_port(const struct ofproto *ofproto, uint16_t ofp_port)
{
struct ofport *port;
HMAP_FOR_EACH_IN_BUCKET (port, hmap_node, hash_int(ofp_port, 0), &ofproto->ports) {
if (port->ofp_port == ofp_port) {
return port;
}
}
return NULL;
}
{
struct ofport *port;
HMAP_FOR_EACH_IN_BUCKET (port, hmap_node, hash_int(ofp_port, 0), &ofproto->ports) {
if (port->ofp_port == ofp_port) {
return port;
}
}
return NULL;
}
来判断这个在ofproto_dpif 没有找到的流(细节在flow_miss 结构体中)是否值得在用户空间追踪细节并且安装一条datapath flow;问题的答案通常是yes,然而,对于那些short flows的记账代价往往超过利益,所以当datapath有很多这样的short flows的时候,我们就要考虑利用一些启发式的规则来决定那些流值得追踪。
static bool flow_miss_should_make_facet(struct ofproto_dpif *ofproto, struct flow_miss *miss, uint32_t hash)
{
if (!ofproto->governor) {
size_t n_subfacets;
n_subfacets = hmap_count(&ofproto->subfacets);
if (n_subfacets * 2 <= ofproto->up.flow_eviction_threshold) {
return true; //在小于ofproto流表踢出门限的时候直接返回;意味着??
}
ofproto->governor = governor_create(ofproto->up.name);
}
return governor_should_install_flow(ofproto->governor, hash,list_size(&miss->packets));
}
if (!ofproto->governor) {
size_t n_subfacets;
n_subfacets = hmap_count(&ofproto->subfacets);
if (n_subfacets * 2 <= ofproto->up.flow_eviction_threshold) {
return true; //在小于ofproto流表踢出门限的时候直接返回;意味着??
}
ofproto->governor = governor_create(ofproto->up.name);
}
return governor_should_install_flow(ofproto->governor, hash,list_size(&miss->packets));
}
如果确定了在ofproto中没有存在完全一样的flow并且flow是rule在ofproto的分类表(流表?)中的最佳匹配,那么这里就是由rule 和 flow 创建并返回一个新的facet,其中hash=flow_hash(flow, 0),初始时facet没有subfacet,下面会创建。这里要注意的是具体的action细节是在相应的规则rule里面。
static struct facet * facet_create(struct rule_dpif *rule, const struct flow *flow, uint32_t hash)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
struct facet *facet;
facet = xzalloc(sizeof *facet);
facet->used = time_msec();
hmap_insert(&ofproto->facets, &facet->hmap_node, hash);
list_push_back(&rule->facets, &facet->list_node);
facet->rule = rule;
facet->flow = *flow;
list_init(&facet->subfacets);
netflow_flow_init(&facet->nf_flow);
netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
return facet;
}
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
struct facet *facet;
facet = xzalloc(sizeof *facet);
facet->used = time_msec();
hmap_insert(&ofproto->facets, &facet->hmap_node, hash);
list_push_back(&rule->facets, &facet->list_node);
facet->rule = rule;
facet->flow = *flow;
list_init(&facet->subfacets);
netflow_flow_init(&facet->nf_flow);
netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
return facet;
}
这时候与flow_miss 匹配的facet也有了,可能会增加需要的datapath操作到flow_miss_op中同时更新计数;在miss中的packets被认为到达的时间是now,这只对新的facets很重要:
/* All of the packets in 'miss' are considered to have arrived at time 'now'.
* This is really important only for new facets: if we just called time_msec()
* here, then the new subfacet or its packets could look (occasionally) as
* though it was used some time after the facet was used. That can make a
* one-packet flow look like it has a nonzero duration, which looks odd in
* e.g. NetFlow statistics. */
static void handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, long long int now, struct flow_miss_op *ops, size_t *n_ops)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
enum subfacet_path want_path;
struct subfacet *subfacet;
struct ofpbuf *packet;
subfacet = subfacet_create(facet, miss->key_fitness, miss->key, miss->key_len,miss->initial_tci, now);
LIST_FOR_EACH (packet, list_node, &miss->packets) { //利用packet来遍历miss中所有的包,依次处理;
struct flow_miss_op *op = &ops[*n_ops];
struct dpif_flow_stats stats;
struct ofpbuf odp_actions;
handle_flow_miss_common(facet->rule, packet, &miss->flow); //fail-open模式 ??
ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
if (!subfacet->actions || subfacet->slow) {
subfacet_make_actions(subfacet, packet, &odp_actions);
}
struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
enum subfacet_path want_path;
struct subfacet *subfacet;
struct ofpbuf *packet;
subfacet = subfacet_create(facet, miss->key_fitness, miss->key, miss->key_len,miss->initial_tci, now);
LIST_FOR_EACH (packet, list_node, &miss->packets) { //利用packet来遍历miss中所有的包,依次处理;
struct flow_miss_op *op = &ops[*n_ops];
struct dpif_flow_stats stats;
struct ofpbuf odp_actions;
handle_flow_miss_common(facet->rule, packet, &miss->flow); //fail-open模式 ??
ofpbuf_use_stub(&odp_actions, op->stub, sizeof op->stub);
if (!subfacet->actions || subfacet->slow) {
subfacet_make_actions(subfacet, packet, &odp_actions);
}
//刚创建完成的subfacet,显然actions=NULL,而对于已存在的subfacet就要看是否是完美匹配,否则也需要更新这个action;
//其实subfacet_make_actions做了很多工作,但是最终获得的就是一个odp_actions;
//其实subfacet_make_actions做了很多工作,但是最终获得的就是一个odp_actions;
dpif_flow_stats_extract(&facet->flow, packet, now, &stats);
//提取状态信息有tcp-flags,包字节数,包数等,接着把stats中的统计更新至subfacet对应的facet中(有条件);
subfacet_update_stats(subfacet, &stats);
if (subfacet->actions_len) {
struct dpif_execute *execute = &op->dpif_op.u.execute;
init_flow_miss_execute_op(miss, packet, op);
op->subfacet = subfacet;
if (!subfacet->slow) { //slow-path-reason。
execute->actions = subfacet->actions;
execute->actions_len = subfacet->actions_len;
ofpbuf_uninit(&odp_actions);
} else { //上面构造的odp-actions用在这里;
execute->actions = odp_actions.data;
execute->actions_len = odp_actions.size;
op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
}
(*n_ops)++;
} else {
ofpbuf_uninit(&odp_actions);
}
}
if (subfacet->actions_len) {
struct dpif_execute *execute = &op->dpif_op.u.execute;
init_flow_miss_execute_op(miss, packet, op);
op->subfacet = subfacet;
if (!subfacet->slow) { //slow-path-reason。
execute->actions = subfacet->actions;
execute->actions_len = subfacet->actions_len;
ofpbuf_uninit(&odp_actions);
} else { //上面构造的odp-actions用在这里;
execute->actions = odp_actions.data;
execute->actions_len = odp_actions.size;
op->garbage = ofpbuf_get_uninit_pointer(&odp_actions);
}
(*n_ops)++;
} else {
ofpbuf_uninit(&odp_actions);
}
}
//这个LIST_FOR_EACH循环完成后,所有的op->dpif_op.u.execute中的actions,actions_len 等都得到了填充;
want_path = subfacet_want_path(subfacet->slow); //返回SF_SLOW_PATH 或者 SF_FAST_PATH
want_path = subfacet_want_path(subfacet->slow); //返回SF_SLOW_PATH 或者 SF_FAST_PATH
//接下来就要构造增加或者更改flow的操作;
if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) {
struct flow_miss_op *op = &ops[(*n_ops)++];
struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
op->subfacet = subfacet;
op->garbage = NULL;
op->dpif_op.type = DPIF_OP_FLOW_PUT;
put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
put->key = miss->key;
put->key_len = miss->key_len;
if (want_path == SF_FAST_PATH) {
put->actions = subfacet->actions;
put->actions_len = subfacet->actions_len;
} else { //暂时没看;
compose_slow_path(ofproto, &facet->flow, subfacet->slow, op->stub, sizeof op->stub,
&put->actions, &put->actions_len);
}
put->stats = NULL;
}
}
if (miss->upcall_type == DPIF_UC_MISS || subfacet->path != want_path) {
struct flow_miss_op *op = &ops[(*n_ops)++];
struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
op->subfacet = subfacet;
op->garbage = NULL;
op->dpif_op.type = DPIF_OP_FLOW_PUT;
put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY;
put->key = miss->key;
put->key_len = miss->key_len;
if (want_path == SF_FAST_PATH) {
put->actions = subfacet->actions;
put->actions_len = subfacet->actions_len;
} else { //暂时没看;
compose_slow_path(ofproto, &facet->flow, subfacet->slow, op->stub, sizeof op->stub,
&put->actions, &put->actions_len);
}
put->stats = NULL;
}
}
从facet中找到合适的subfacet,如果没有的话就创建一个新的,初始化其中的一些字段,如果这个subfacet是新创的那么 subfacet->actions=NULL,在接下来的subfacet_make_actions()函数中会得到构造。 key_fitness也没看懂意味着什么??
static struct subfacet * subfacet_create(struct facet *facet, enum odp_key_fitness key_fitness,
const struct nlattr *key, size_t key_len,
ovs_be16 initial_tci, long long int now)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
uint32_t key_hash = odp_flow_key_hash(key, key_len); //得到nla的哈希值;
struct subfacet *subfacet;
if (list_is_empty(&facet->subfacets)) { //如果facet中字段subfacets为空;
subfacet = &facet->one_subfacet;
} else {
subfacet = subfacet_find__(ofproto, key, key_len, key_hash, &facet->flow);
if (subfacet) {
if (subfacet->facet == facet) {
return subfacet; //如果这个subfacet存在的话,就直接返回,否则下面会构造;
}
/* This shouldn't happen. */
VLOG_ERR_RL(&rl, "subfacet with wrong facet");
subfacet_destroy(subfacet);
}
subfacet = xmalloc(sizeof *subfacet);
}
hmap_insert(&ofproto->subfacets, &subfacet->hmap_node, key_hash);
list_push_back(&facet->subfacets, &subfacet->list_node);
subfacet->facet = facet;
subfacet->key_fitness = key_fitness;
if (key_fitness != ODP_FIT_PERFECT) { //判断属于何种 fitness不是很明白??
subfacet->key = xmemdup(key, key_len);
subfacet->key_len = key_len;
} else { //如果key_fitness = ODP_FIT_PERFECT完美匹配的话就不需要记录其他的信息;
subfacet->key = NULL;
subfacet->key_len = 0;
}
subfacet->used = now;
subfacet->dp_packet_count = 0;
subfacet->dp_byte_count = 0;
subfacet->actions_len = 0;
subfacet->actions = NULL;
subfacet->slow = (subfacet->key_fitness == ODP_FIT_TOO_LITTLE ? SLOW_MATCH : 0);
subfacet->path = SF_NOT_INSTALLED; //
subfacet->initial_tci = initial_tci;
return subfacet;
}
const struct nlattr *key, size_t key_len,
ovs_be16 initial_tci, long long int now)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
uint32_t key_hash = odp_flow_key_hash(key, key_len); //得到nla的哈希值;
struct subfacet *subfacet;
if (list_is_empty(&facet->subfacets)) { //如果facet中字段subfacets为空;
subfacet = &facet->one_subfacet;
} else {
subfacet = subfacet_find__(ofproto, key, key_len, key_hash, &facet->flow);
if (subfacet) {
if (subfacet->facet == facet) {
return subfacet; //如果这个subfacet存在的话,就直接返回,否则下面会构造;
}
/* This shouldn't happen. */
VLOG_ERR_RL(&rl, "subfacet with wrong facet");
subfacet_destroy(subfacet);
}
subfacet = xmalloc(sizeof *subfacet);
}
hmap_insert(&ofproto->subfacets, &subfacet->hmap_node, key_hash);
list_push_back(&facet->subfacets, &subfacet->list_node);
subfacet->facet = facet;
subfacet->key_fitness = key_fitness;
if (key_fitness != ODP_FIT_PERFECT) { //判断属于何种 fitness不是很明白??
subfacet->key = xmemdup(key, key_len);
subfacet->key_len = key_len;
} else { //如果key_fitness = ODP_FIT_PERFECT完美匹配的话就不需要记录其他的信息;
subfacet->key = NULL;
subfacet->key_len = 0;
}
subfacet->used = now;
subfacet->dp_packet_count = 0;
subfacet->dp_byte_count = 0;
subfacet->actions_len = 0;
subfacet->actions = NULL;
subfacet->slow = (subfacet->key_fitness == ODP_FIT_TOO_LITTLE ? SLOW_MATCH : 0);
subfacet->path = SF_NOT_INSTALLED; //
subfacet->initial_tci = initial_tci;
return subfacet;
}
基于subfacet(里面会指向rule相关的字段)构造datapath actions,然后转换到ofpbuf结构体中,调用者要负责申请和释放这个缓存。
static void subfacet_make_actions(struct subfacet *subfacet, const struct ofpbuf *packet, struct ofpbuf *odp_actions)
{
struct facet *facet = subfacet->facet;
struct rule_dpif *rule = facet->rule;
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
struct action_xlate_ctx ctx;
action_xlate_ctx_init(&ctx, ofproto, &facet->flow, subfacet->initial_tci, rule, 0, packet);
{
struct facet *facet = subfacet->facet;
struct rule_dpif *rule = facet->rule;
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
struct action_xlate_ctx ctx;
action_xlate_ctx_init(&ctx, ofproto, &facet->flow, subfacet->initial_tci, rule, 0, packet);
//用ofproto-fpif,flow,rule,packet来初始化这个action translate context;
xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len, odp_actions);
facet->tags = ctx.tags;
facet->has_learn = ctx.has_learn;
facet->has_normal = ctx.has_normal;
facet->has_fin_timeout = ctx.has_fin_timeout;
facet->nf_flow.output_iface = ctx.nf_output_iface;
facet->mirrors = ctx.mirrors;
subfacet->slow = (subfacet->slow & SLOW_MATCH) | ctx.slow;
if (subfacet->actions_len != odp_actions->size || memcmp(subfacet->actions, odp_actions->data, odp_actions->size)) {
free(subfacet->actions);
subfacet->actions_len = odp_actions->size;
subfacet->actions = xmemdup(odp_actions->data, odp_actions->size);
}
}
struct action_xlate_ctx {
/* action_xlate_ctx_init() initializes these members. */
/* The ofproto. */
struct ofproto_dpif *ofproto;
/* Flow to which the OpenFlow actions apply. xlate_actions() will modify
* this flow when actions change header fields. */
struct flow flow;
/* The packet corresponding to 'flow', or a null pointer if we are
* revalidating without a packet to refer to. */
const struct ofpbuf *packet;
/* Should OFPP_NORMAL update the MAC learning table? Should "learn"
* actions update the flow table?
*
* We want to update these tables if we are actually processing a packet,
* or if we are accounting for packets that the datapath has processed, but
* not if we are just revalidating. */
bool may_learn; //当在处理一个真正的数据包时应该置为真;
/* The rule that we are currently translating, or NULL. */
struct rule_dpif *rule;
/* Union of the set of TCP flags seen so far in this flow. (Used only by
* NXAST_FIN_TIMEOUT. Set to zero to avoid updating updating rules'
* timeouts.) */
uint8_t tcp_flags;
/* If nonnull, flow translation calls this function just before executing a
* resubmit or OFPP_TABLE action. In addition, disables logging of traces
* when the recursion depth is exceeded.
*
* 'rule' is the rule being submitted into. It will be null if the
* resubmit or OFPP_TABLE action didn't find a matching rule.
*
* This is normally null so the client has to set it manually after
* calling action_xlate_ctx_init(). */
void (*resubmit_hook)(struct action_xlate_ctx *, struct rule_dpif *rule);
/* If nonnull, flow translation calls this function to report some
* significant decision, e.g. to explain why OFPP_NORMAL translation
* dropped a packet. */
void (*report_hook)(struct action_xlate_ctx *, const char *s);
/* If nonnull, flow translation credits the specified statistics to each
* rule reached through a resubmit or OFPP_TABLE action.
*
* This is normally null so the client has to set it manually after
* calling action_xlate_ctx_init(). */
const struct dpif_flow_stats *resubmit_stats;
/* xlate_actions() initializes and uses these members. The client might want
* to look at them after it returns. */
struct ofpbuf *odp_actions; /* Datapath actions. */
tag_type tags; /* Tags associated with actions. */
enum slow_path_reason slow; /* 0 if fast path may be used. */
bool has_learn; /* Actions include NXAST_LEARN? */
bool has_normal; /* Actions output to OFPP_NORMAL? */
bool has_fin_timeout; /* Actions include NXAST_FIN_TIMEOUT? */
uint16_t nf_output_iface; /* Output interface index for NetFlow. */
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
/* xlate_actions() initializes and uses these members, but the client has no
* reason to look at them. */
int recurse; /* Recursion level, via xlate_table_action. */
bool max_resubmit_trigger; /* Recursed too deeply during translation. */
struct flow base_flow; /* Flow at the last commit. */
uint32_t orig_skb_priority; /* Priority when packet arrived. */
uint8_t table_id; /* OpenFlow table ID where flow was found. */
uint32_t sflow_n_outputs; /* Number of output ports. */
uint16_t sflow_odp_port; /* Output port for composing sFlow action. */
uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */
bool exit; /* No further actions should be processed. */
struct flow orig_flow; /* Copy of original flow. */
};
/* action_xlate_ctx_init() initializes these members. */
/* The ofproto. */
struct ofproto_dpif *ofproto;
/* Flow to which the OpenFlow actions apply. xlate_actions() will modify
* this flow when actions change header fields. */
struct flow flow;
/* The packet corresponding to 'flow', or a null pointer if we are
* revalidating without a packet to refer to. */
const struct ofpbuf *packet;
/* Should OFPP_NORMAL update the MAC learning table? Should "learn"
* actions update the flow table?
*
* We want to update these tables if we are actually processing a packet,
* or if we are accounting for packets that the datapath has processed, but
* not if we are just revalidating. */
bool may_learn; //当在处理一个真正的数据包时应该置为真;
/* The rule that we are currently translating, or NULL. */
struct rule_dpif *rule;
/* Union of the set of TCP flags seen so far in this flow. (Used only by
* NXAST_FIN_TIMEOUT. Set to zero to avoid updating updating rules'
* timeouts.) */
uint8_t tcp_flags;
/* If nonnull, flow translation calls this function just before executing a
* resubmit or OFPP_TABLE action. In addition, disables logging of traces
* when the recursion depth is exceeded.
*
* 'rule' is the rule being submitted into. It will be null if the
* resubmit or OFPP_TABLE action didn't find a matching rule.
*
* This is normally null so the client has to set it manually after
* calling action_xlate_ctx_init(). */
void (*resubmit_hook)(struct action_xlate_ctx *, struct rule_dpif *rule);
/* If nonnull, flow translation calls this function to report some
* significant decision, e.g. to explain why OFPP_NORMAL translation
* dropped a packet. */
void (*report_hook)(struct action_xlate_ctx *, const char *s);
/* If nonnull, flow translation credits the specified statistics to each
* rule reached through a resubmit or OFPP_TABLE action.
*
* This is normally null so the client has to set it manually after
* calling action_xlate_ctx_init(). */
const struct dpif_flow_stats *resubmit_stats;
/* xlate_actions() initializes and uses these members. The client might want
* to look at them after it returns. */
struct ofpbuf *odp_actions; /* Datapath actions. */
tag_type tags; /* Tags associated with actions. */
enum slow_path_reason slow; /* 0 if fast path may be used. */
bool has_learn; /* Actions include NXAST_LEARN? */
bool has_normal; /* Actions output to OFPP_NORMAL? */
bool has_fin_timeout; /* Actions include NXAST_FIN_TIMEOUT? */
uint16_t nf_output_iface; /* Output interface index for NetFlow. */
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
/* xlate_actions() initializes and uses these members, but the client has no
* reason to look at them. */
int recurse; /* Recursion level, via xlate_table_action. */
bool max_resubmit_trigger; /* Recursed too deeply during translation. */
struct flow base_flow; /* Flow at the last commit. */
uint32_t orig_skb_priority; /* Priority when packet arrived. */
uint8_t table_id; /* OpenFlow table ID where flow was found. */
uint32_t sflow_n_outputs; /* Number of output ports. */
uint16_t sflow_odp_port; /* Output port for composing sFlow action. */
uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */
bool exit; /* No further actions should be processed. */
struct flow orig_flow; /* Copy of original flow. */
};
将这些openflow actions转换为genlmsg格式。
static void xlate_actions(struct action_xlate_ctx *ctx, const struct ofpact *ofpacts, size_t ofpacts_len, struct ofpbuf *odp_actions)
{
/* Normally false. Set to true if we ever hit MAX_RESUBMIT_RECURSION, so that in the future we always keep a copy of the original flow for tracing purposes. */
static bool hit_resubmit_limit;
enum slow_path_reason special;
COVERAGE_INC(ofproto_dpif_xlate);
ofpbuf_clear(odp_actions);
ofpbuf_reserve(odp_actions, NL_A_U32_SIZE);
ctx->odp_actions = odp_actions;
ctx->tags = 0;
ctx->slow = 0;
ctx->has_learn = false; //??
ctx->has_normal = false;
ctx->has_fin_timeout = false;
ctx->nf_output_iface = NF_OUT_DROP;
ctx->mirrors = 0;
ctx->recurse = 0;
ctx->max_resubmit_trigger = false;
ctx->orig_skb_priority = ctx->flow.skb_priority;
ctx->table_id = 0;
ctx->exit = false;
if (ctx->ofproto->has_mirrors || hit_resubmit_limit) {
/* Do this conditionally because the copy is expensive enough that it shows up in profiles.
* We keep orig_flow in 'ctx' only because I couldn't make GCC 4.4 believe that I wasn't using it without initializing it if I kept it in a local variable. */
ctx->orig_flow = ctx->flow;
}
if (ctx->flow.nw_frag & FLOW_NW_FRAG_ANY) { //每个IP分片都会设置这个标志;
switch (ctx->ofproto->up.frag_handling) { //在include/openflow/openflow-1.0.h中定义的枚举ofp_config_flags,对分片的不同策略;
case OFPC_FRAG_NORMAL: //对分片不会特别处理;
/* We must pretend that transport ports are unavailable. */
ctx->flow.tp_src = ctx->base_flow.tp_src = htons(0);
ctx->flow.tp_dst = ctx->base_flow.tp_dst = htons(0);
break;
case OFPC_FRAG_DROP:
return;
case OFPC_FRAG_REASM:
NOT_REACHED(); //abort();
case OFPC_FRAG_NX_MATCH:
/* Nothing to do. */
break;
case OFPC_INVALID_TTL_TO_CONTROLLER:
NOT_REACHED();
}
}
static void xlate_actions(struct action_xlate_ctx *ctx, const struct ofpact *ofpacts, size_t ofpacts_len, struct ofpbuf *odp_actions)
{
/* Normally false. Set to true if we ever hit MAX_RESUBMIT_RECURSION, so that in the future we always keep a copy of the original flow for tracing purposes. */
static bool hit_resubmit_limit;
enum slow_path_reason special;
COVERAGE_INC(ofproto_dpif_xlate);
ofpbuf_clear(odp_actions);
ofpbuf_reserve(odp_actions, NL_A_U32_SIZE);
ctx->odp_actions = odp_actions;
ctx->tags = 0;
ctx->slow = 0;
ctx->has_learn = false; //??
ctx->has_normal = false;
ctx->has_fin_timeout = false;
ctx->nf_output_iface = NF_OUT_DROP;
ctx->mirrors = 0;
ctx->recurse = 0;
ctx->max_resubmit_trigger = false;
ctx->orig_skb_priority = ctx->flow.skb_priority;
ctx->table_id = 0;
ctx->exit = false;
if (ctx->ofproto->has_mirrors || hit_resubmit_limit) {
/* Do this conditionally because the copy is expensive enough that it shows up in profiles.
* We keep orig_flow in 'ctx' only because I couldn't make GCC 4.4 believe that I wasn't using it without initializing it if I kept it in a local variable. */
ctx->orig_flow = ctx->flow;
}
if (ctx->flow.nw_frag & FLOW_NW_FRAG_ANY) { //每个IP分片都会设置这个标志;
switch (ctx->ofproto->up.frag_handling) { //在include/openflow/openflow-1.0.h中定义的枚举ofp_config_flags,对分片的不同策略;
case OFPC_FRAG_NORMAL: //对分片不会特别处理;
/* We must pretend that transport ports are unavailable. */
ctx->flow.tp_src = ctx->base_flow.tp_src = htons(0);
ctx->flow.tp_dst = ctx->base_flow.tp_dst = htons(0);
break;
case OFPC_FRAG_DROP:
return;
case OFPC_FRAG_REASM:
NOT_REACHED(); //abort();
case OFPC_FRAG_NX_MATCH:
/* Nothing to do. */
break;
case OFPC_INVALID_TTL_TO_CONTROLLER:
NOT_REACHED();
}
}
//上面这个逻辑到底完成了什么功能呢?也就是我们的上层收到的一定是完整的packet,而不处理分片吗?LRO?
special = process_special(ctx->ofproto, &ctx->flow, ctx->packet);
if (special) {
ctx->slow |= special;
} else { // 大多数情况下不会是CFM,LACP,STP;
static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
ovs_be16 initial_tci = ctx->base_flow.vlan_tci;
add_sflow_action(ctx);
do_xlate_actions(ofpacts, ofpacts_len, ctx);
if (ctx->max_resubmit_trigger && !ctx->resubmit_hook) {
if (!hit_resubmit_limit) {
/* We didn't record the original flow. Make sure we do from
* now on. */
hit_resubmit_limit = true;
} else if (!VLOG_DROP_ERR(&trace_rl)) {
struct ds ds = DS_EMPTY_INITIALIZER;
ofproto_trace(ctx->ofproto, &ctx->orig_flow, ctx->packet, initial_tci, &ds);
VLOG_ERR("Trace triggered by excessive resubmit recursion:\n%s", ds_cstr(&ds));
ds_destroy(&ds);
}
}
if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow,
ctx->odp_actions->data,
ctx->odp_actions->size)) {
ctx->slow |= SLOW_IN_BAND;
if (ctx->packet&& connmgr_msg_in_hook(ctx->ofproto->up.connmgr, &ctx->flow,
ctx->packet)) {
compose_output_action(ctx, OFPP_LOCAL);
}
}
if (ctx->ofproto->has_mirrors) {
add_mirror_actions(ctx, &ctx->orig_flow);
}
fix_sflow_action(ctx);
}
}
special = process_special(ctx->ofproto, &ctx->flow, ctx->packet);
if (special) {
ctx->slow |= special;
} else { // 大多数情况下不会是CFM,LACP,STP;
static struct vlog_rate_limit trace_rl = VLOG_RATE_LIMIT_INIT(1, 1);
ovs_be16 initial_tci = ctx->base_flow.vlan_tci;
add_sflow_action(ctx);
do_xlate_actions(ofpacts, ofpacts_len, ctx);
if (ctx->max_resubmit_trigger && !ctx->resubmit_hook) {
if (!hit_resubmit_limit) {
/* We didn't record the original flow. Make sure we do from
* now on. */
hit_resubmit_limit = true;
} else if (!VLOG_DROP_ERR(&trace_rl)) {
struct ds ds = DS_EMPTY_INITIALIZER;
ofproto_trace(ctx->ofproto, &ctx->orig_flow, ctx->packet, initial_tci, &ds);
VLOG_ERR("Trace triggered by excessive resubmit recursion:\n%s", ds_cstr(&ds));
ds_destroy(&ds);
}
}
if (!connmgr_may_set_up_flow(ctx->ofproto->up.connmgr, &ctx->flow,
ctx->odp_actions->data,
ctx->odp_actions->size)) {
ctx->slow |= SLOW_IN_BAND;
if (ctx->packet&& connmgr_msg_in_hook(ctx->ofproto->up.connmgr, &ctx->flow,
ctx->packet)) {
compose_output_action(ctx, OFPP_LOCAL);
}
}
if (ctx->ofproto->has_mirrors) {
add_mirror_actions(ctx, &ctx->orig_flow);
}
fix_sflow_action(ctx);
}
}
static enum slow_path_reason process_special(struct ofproto_dpif *ofproto, const struct flow *flow, const struct ofpbuf *packet)
{
struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port);
if (!ofport) {
return 0;
}
//如果IEEE802.1agCFM要处理来自flow的packets的话;(帧类型是ETH_TYPE_CFM)
if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) {
if (packet) {
cfm_process_heartbeat(ofport->cfm, packet);
}
return SLOW_CFM;
} else if (ofport->bundle && ofport->bundle->lacp && flow->dl_type == htons(ETH_TYPE_LACP)) {
if (packet) { //IEEE802.3ad链路聚合控制协议;
lacp_process_packet(ofport->bundle->lacp, ofport, packet);
}
return SLOW_LACP;
} else if (ofproto->stp && stp_should_process_flow(flow)) {
if (packet) {
stp_process_packet(ofport, packet);
}
return SLOW_STP;
}
return 0;
}
{
struct ofport_dpif *ofport = get_ofp_port(ofproto, flow->in_port);
if (!ofport) {
return 0;
}
//如果IEEE802.1agCFM要处理来自flow的packets的话;(帧类型是ETH_TYPE_CFM)
if (ofport->cfm && cfm_should_process_flow(ofport->cfm, flow)) {
if (packet) {
cfm_process_heartbeat(ofport->cfm, packet);
}
return SLOW_CFM;
} else if (ofport->bundle && ofport->bundle->lacp && flow->dl_type == htons(ETH_TYPE_LACP)) {
if (packet) { //IEEE802.3ad链路聚合控制协议;
lacp_process_packet(ofport->bundle->lacp, ofport, packet);
}
return SLOW_LACP;
} else if (ofproto->stp && stp_should_process_flow(flow)) {
if (packet) {
stp_process_packet(ofport, packet);
}
return SLOW_STP;
}
return 0;
}
样品动作(SAMPLE action)必须在一个action list的开始,所以没有必要构造的那么完美,尽力;
static void add_sflow_action(struct action_xlate_ctx *ctx)
{
ctx->user_cookie_offset = compose_sflow_action(ctx->ofproto, ctx->odp_actions,&ctx->flow, OVSP_NONE);
ctx->sflow_odp_port = 0;
ctx->sflow_n_outputs = 0;
}
{
ctx->user_cookie_offset = compose_sflow_action(ctx->ofproto, ctx->odp_actions,&ctx->flow, OVSP_NONE);
ctx->sflow_odp_port = 0;
ctx->sflow_n_outputs = 0;
}
/* Compose SAMPLE action for sFlow. */
static size_t compose_sflow_action(const struct ofproto_dpif *ofproto,struct ofpbuf *odp_actions,
const struct flow *flow,uint32_t odp_port)
{
uint32_t probability;
union user_action_cookie cookie;
size_t sample_offset, actions_offset;
int cookie_offset;
if (!ofproto->sflow || flow->in_port == OFPP_NONE) {
return 0;
}
sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
/* Number of packets out of UINT_MAX to sample. */
probability = dpif_sflow_get_probability(ofproto->sflow);
nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
compose_sflow_cookie(ofproto, htons(0), odp_port, odp_port == OVSP_NONE ? 0 : 1, &cookie);
static size_t compose_sflow_action(const struct ofproto_dpif *ofproto,struct ofpbuf *odp_actions,
const struct flow *flow,uint32_t odp_port)
{
uint32_t probability;
union user_action_cookie cookie;
size_t sample_offset, actions_offset;
int cookie_offset;
if (!ofproto->sflow || flow->in_port == OFPP_NONE) {
return 0;
}
sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE);
/* Number of packets out of UINT_MAX to sample. */
probability = dpif_sflow_get_probability(ofproto->sflow);
nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS);
compose_sflow_cookie(ofproto, htons(0), odp_port, odp_port == OVSP_NONE ? 0 : 1, &cookie);
//其实就是填充cookie->sflow->output端口,置为端口号或者标为丢弃;
cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie);
nl_msg_end_nested(odp_actions, actions_offset);
nl_msg_end_nested(odp_actions, sample_offset);
return cookie_offset;
}
cookie_offset = put_userspace_action(ofproto, odp_actions, flow, &cookie);
nl_msg_end_nested(odp_actions, actions_offset);
nl_msg_end_nested(odp_actions, sample_offset);
return cookie_offset;
}
------------lib/odp-util.h
/* user_action_cookie is passed as argument to OVS_ACTION_ATTR_USERSPACE.
* Since it is passed to kernel as u64, its size has to be 8 bytes. */
union user_action_cookie {
uint16_t type; /* enum user_action_cookie_type. */
struct {
uint16_t type; /* USER_ACTION_COOKIE_SFLOW. */
ovs_be16 vlan_tci; /* Destination VLAN TCI. */
uint32_t output; /* SFL_FLOW_SAMPLE_TYPE 'output' value. */
} sflow;
struct {
uint16_t type; /* USER_ACTION_COOKIE_SLOW_PATH. */
uint16_t unused;
uint32_t reason; /* enum slow_path_reason. */
} slow_path;
};
* Since it is passed to kernel as u64, its size has to be 8 bytes. */
union user_action_cookie {
uint16_t type; /* enum user_action_cookie_type. */
struct {
uint16_t type; /* USER_ACTION_COOKIE_SFLOW. */
ovs_be16 vlan_tci; /* Destination VLAN TCI. */
uint32_t output; /* SFL_FLOW_SAMPLE_TYPE 'output' value. */
} sflow;
struct {
uint16_t type; /* USER_ACTION_COOKIE_SLOW_PATH. */
uint16_t unused;
uint32_t reason; /* enum slow_path_reason. */
} slow_path;
};
static size_t put_userspace_action(const struct ofproto_dpif *ofproto, struct ofpbuf *odp_actions,
const struct flow *flow, const union user_action_cookie *cookie)
{
uint32_t pid;
pid = dpif_port_get_pid(ofproto->dpif, ofp_port_to_odp_port(flow->in_port));
return odp_put_userspace_action(pid, cookie, odp_actions);
}
const struct flow *flow, const union user_action_cookie *cookie)
{
uint32_t pid;
pid = dpif_port_get_pid(ofproto->dpif, ofp_port_to_odp_port(flow->in_port));
return odp_put_userspace_action(pid, cookie, odp_actions);
}
/* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions
* as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
* packets arrived on port 'port_no'.
*
* A 'port_no' of UINT16_MAX is a special case: it returns a reserved PID, not
* allocated to any port, that the client may use for special purposes.
*
* The return value is only meaningful when DPIF_UC_ACTION has been enabled in
* the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is
* disabled and then re-enabled, so a client that does that must be prepared to
* update all of the flows that it installed that contain
* OVS_ACTION_ATTR_USERSPACE actions. */
uint32_t dpif_port_get_pid(const struct dpif *dpif, uint16_t port_no)
{
return (dpif->dpif_class->port_get_pid ? (dpif->dpif_class->port_get_pid)(dpif, port_no) : 0);
* as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose
* packets arrived on port 'port_no'.
*
* A 'port_no' of UINT16_MAX is a special case: it returns a reserved PID, not
* allocated to any port, that the client may use for special purposes.
*
* The return value is only meaningful when DPIF_UC_ACTION has been enabled in
* the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is
* disabled and then re-enabled, so a client that does that must be prepared to
* update all of the flows that it installed that contain
* OVS_ACTION_ATTR_USERSPACE actions. */
uint32_t dpif_port_get_pid(const struct dpif *dpif, uint16_t port_no)
{
return (dpif->dpif_class->port_get_pid ? (dpif->dpif_class->port_get_pid)(dpif, port_no) : 0);
//调用具体datapath interface接口实现的port_get_pid这个方法得到我们用户层的netlink pid,比如说dpif_linux_class;
}
}
/* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
* Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose
* contents contains 'cookie' and returns the offset within 'odp_actions' of
* the start of the cookie. (If 'cookie' is null, then the return value is not
* meaningful.) */
size_t odp_put_userspace_action(uint32_t pid, const union user_action_cookie *cookie, struct ofpbuf *odp_actions)
{
size_t offset;
offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE);
nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid);
if (cookie) {
nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, cookie, sizeof *cookie);
}
nl_msg_end_nested(odp_actions, offset);
return cookie ? odp_actions->size - NLA_ALIGN(sizeof *cookie) : 0;
}
* Netlink PID 'pid'. If 'cookie' is nonnull, adds a userdata attribute whose
* contents contains 'cookie' and returns the offset within 'odp_actions' of
* the start of the cookie. (If 'cookie' is null, then the return value is not
* meaningful.) */
size_t odp_put_userspace_action(uint32_t pid, const union user_action_cookie *cookie, struct ofpbuf *odp_actions)
{
size_t offset;
offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE);
nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid);
if (cookie) {
nl_msg_put_unspec(odp_actions, OVS_USERSPACE_ATTR_USERDATA, cookie, sizeof *cookie);
}
nl_msg_end_nested(odp_actions, offset);
return cookie ? odp_actions->size - NLA_ALIGN(sizeof *cookie) : 0;
}
这里很重要,会根据rule->up.ofpacts来执行真正的动作,当第一个packet因为匹配失败到达这里的时候,动作肯定是从端口输出,所以这里看OUTPUT。ofpact_**都定义在lib/ofp-actions.h中,这里ofpact_output结构体中有输出端口字段。
static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct action_xlate_ctx *ctx)
{
const struct ofport_dpif *port;
bool was_evictable = true;
const struct ofpact *a;
port = get_ofp_port(ctx->ofproto, ctx->flow.in_port); //得到具体实现的端口实例;
if (port && !may_receive(port, ctx)) {
/* Drop this flow. */
return;
}
if (ctx->rule) {
/* Don't let the rule we're working on get evicted underneath us. */
was_evictable = ctx->rule->up.evictable;
ctx->rule->up.evictable = false;
}
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
struct ofpact_controller *controller;
const struct ofpact_metadata *metadata;
if (ctx->exit) {
break;
}
{
const struct ofport_dpif *port;
bool was_evictable = true;
const struct ofpact *a;
port = get_ofp_port(ctx->ofproto, ctx->flow.in_port); //得到具体实现的端口实例;
if (port && !may_receive(port, ctx)) {
/* Drop this flow. */
return;
}
if (ctx->rule) {
/* Don't let the rule we're working on get evicted underneath us. */
was_evictable = ctx->rule->up.evictable;
ctx->rule->up.evictable = false;
}
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
struct ofpact_controller *controller;
const struct ofpact_metadata *metadata;
if (ctx->exit) {
break;
}
//这些具体的action-type定义在lib/ofp-actions.h中,对应的是ofpact_*实例;
switch (a->type) {
case OFPACT_OUTPUT:
xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port, ofpact_get_OUTPUT(a)->max_len, true);
break;
case OFPACT_CONTROLLER:
controller = ofpact_get_CONTROLLER(a);
execute_controller_action(ctx, controller->max_len, controller->reason,controller->controller_id);
break;
case OFPACT_ENQUEUE:
xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
break;
case OFPACT_SET_VLAN_VID:
ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK);
ctx->flow.vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)| htons(VLAN_CFI));
break;
case OFPACT_SET_VLAN_PCP:
ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK);
ctx->flow.vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp<< VLAN_PCP_SHIFT)| VLAN_CFI);
break;
case OFPACT_STRIP_VLAN:
ctx->flow.vlan_tci = htons(0);
break;
case OFPACT_SET_ETH_SRC:
memcpy(ctx->flow.dl_src, ofpact_get_SET_ETH_SRC(a)->mac,
ETH_ADDR_LEN);
break;
case OFPACT_SET_ETH_DST:
memcpy(ctx->flow.dl_dst, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN);
break;
case OFPACT_SET_IPV4_SRC:
ctx->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
break;
case OFPACT_SET_IPV4_DST:
ctx->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
break;
case OFPACT_SET_IPV4_DSCP:
/* OpenFlow 1.0 only supports IPv4. */
if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
ctx->flow.nw_tos &= ~IP_DSCP_MASK;
ctx->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
}
break;
case OFPACT_SET_L4_SRC_PORT:
ctx->flow.tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
break;
case OFPACT_SET_L4_DST_PORT:
ctx->flow.tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
break;
case OFPACT_RESUBMIT:
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
break;
case OFPACT_SET_TUNNEL:
ctx->flow.tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
break;
case OFPACT_SET_QUEUE:
xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
break;
case OFPACT_POP_QUEUE:
ctx->flow.skb_priority = ctx->orig_skb_priority;
break;
case OFPACT_REG_MOVE:
nxm_execute_reg_move(ofpact_get_REG_MOVE(a), &ctx->flow);
break;
case OFPACT_REG_LOAD:
nxm_execute_reg_load(ofpact_get_REG_LOAD(a), &ctx->flow);
break;
case OFPACT_DEC_TTL:
if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
goto out;
}
break;
case OFPACT_NOTE:
/* Nothing to do. */
break;
case OFPACT_MULTIPATH:
multipath_execute(ofpact_get_MULTIPATH(a), &ctx->flow);
break;
case OFPACT_AUTOPATH:
xlate_autopath(ctx, ofpact_get_AUTOPATH(a));
break;
case OFPACT_BUNDLE:
ctx->ofproto->has_bundle_action = true;
xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
break;
case OFPACT_OUTPUT_REG:
xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
break;
case OFPACT_LEARN:
ctx->has_learn = true;
if (ctx->may_learn) {
xlate_learn_action(ctx, ofpact_get_LEARN(a));
}
break;
case OFPACT_EXIT:
ctx->exit = true;
break;
case OFPACT_FIN_TIMEOUT:
ctx->has_fin_timeout = true;
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
break;
case OFPACT_CLEAR_ACTIONS:
/* TODO:XXX
* Nothing to do because writa-actions is not supported for now.
* When writa-actions is supported, clear-actions also must
* be supported at the same time.
*/
break;
case OFPACT_WRITE_METADATA:
metadata = ofpact_get_WRITE_METADATA(a);
ctx->flow.metadata &= ~metadata->mask;
ctx->flow.metadata |= metadata->metadata & metadata->mask;
break;
case OFPACT_GOTO_TABLE: {
/* TODO:XXX remove recursion */
/* It is assumed that goto-table is last action */
struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
assert(ctx->table_id < ogt->table_id);
xlate_table_action(ctx, ctx->flow.in_port, ogt->table_id, true);
break;
}
}
}
out:
/* We've let OFPP_NORMAL and the learning action look at the packet,
* so drop it now if forwarding is disabled. */
if (port && !stp_forward_in_state(port->stp_state)) {
ofpbuf_clear(ctx->odp_actions);
add_sflow_action(ctx);
}
if (ctx->rule) {
ctx->rule->up.evictable = was_evictable;
}
}
switch (a->type) {
case OFPACT_OUTPUT:
xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port, ofpact_get_OUTPUT(a)->max_len, true);
break;
case OFPACT_CONTROLLER:
controller = ofpact_get_CONTROLLER(a);
execute_controller_action(ctx, controller->max_len, controller->reason,controller->controller_id);
break;
case OFPACT_ENQUEUE:
xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
break;
case OFPACT_SET_VLAN_VID:
ctx->flow.vlan_tci &= ~htons(VLAN_VID_MASK);
ctx->flow.vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)| htons(VLAN_CFI));
break;
case OFPACT_SET_VLAN_PCP:
ctx->flow.vlan_tci &= ~htons(VLAN_PCP_MASK);
ctx->flow.vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp<< VLAN_PCP_SHIFT)| VLAN_CFI);
break;
case OFPACT_STRIP_VLAN:
ctx->flow.vlan_tci = htons(0);
break;
case OFPACT_SET_ETH_SRC:
memcpy(ctx->flow.dl_src, ofpact_get_SET_ETH_SRC(a)->mac,
ETH_ADDR_LEN);
break;
case OFPACT_SET_ETH_DST:
memcpy(ctx->flow.dl_dst, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN);
break;
case OFPACT_SET_IPV4_SRC:
ctx->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
break;
case OFPACT_SET_IPV4_DST:
ctx->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
break;
case OFPACT_SET_IPV4_DSCP:
/* OpenFlow 1.0 only supports IPv4. */
if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
ctx->flow.nw_tos &= ~IP_DSCP_MASK;
ctx->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
}
break;
case OFPACT_SET_L4_SRC_PORT:
ctx->flow.tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
break;
case OFPACT_SET_L4_DST_PORT:
ctx->flow.tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
break;
case OFPACT_RESUBMIT:
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
break;
case OFPACT_SET_TUNNEL:
ctx->flow.tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
break;
case OFPACT_SET_QUEUE:
xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
break;
case OFPACT_POP_QUEUE:
ctx->flow.skb_priority = ctx->orig_skb_priority;
break;
case OFPACT_REG_MOVE:
nxm_execute_reg_move(ofpact_get_REG_MOVE(a), &ctx->flow);
break;
case OFPACT_REG_LOAD:
nxm_execute_reg_load(ofpact_get_REG_LOAD(a), &ctx->flow);
break;
case OFPACT_DEC_TTL:
if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
goto out;
}
break;
case OFPACT_NOTE:
/* Nothing to do. */
break;
case OFPACT_MULTIPATH:
multipath_execute(ofpact_get_MULTIPATH(a), &ctx->flow);
break;
case OFPACT_AUTOPATH:
xlate_autopath(ctx, ofpact_get_AUTOPATH(a));
break;
case OFPACT_BUNDLE:
ctx->ofproto->has_bundle_action = true;
xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
break;
case OFPACT_OUTPUT_REG:
xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
break;
case OFPACT_LEARN:
ctx->has_learn = true;
if (ctx->may_learn) {
xlate_learn_action(ctx, ofpact_get_LEARN(a));
}
break;
case OFPACT_EXIT:
ctx->exit = true;
break;
case OFPACT_FIN_TIMEOUT:
ctx->has_fin_timeout = true;
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
break;
case OFPACT_CLEAR_ACTIONS:
/* TODO:XXX
* Nothing to do because writa-actions is not supported for now.
* When writa-actions is supported, clear-actions also must
* be supported at the same time.
*/
break;
case OFPACT_WRITE_METADATA:
metadata = ofpact_get_WRITE_METADATA(a);
ctx->flow.metadata &= ~metadata->mask;
ctx->flow.metadata |= metadata->metadata & metadata->mask;
break;
case OFPACT_GOTO_TABLE: {
/* TODO:XXX remove recursion */
/* It is assumed that goto-table is last action */
struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
assert(ctx->table_id < ogt->table_id);
xlate_table_action(ctx, ctx->flow.in_port, ogt->table_id, true);
break;
}
}
}
out:
/* We've let OFPP_NORMAL and the learning action look at the packet,
* so drop it now if forwarding is disabled. */
if (port && !stp_forward_in_state(port->stp_state)) {
ofpbuf_clear(ctx->odp_actions);
add_sflow_action(ctx);
}
if (ctx->rule) {
ctx->rule->up.evictable = was_evictable;
}
}
struct ofpact {
enum ofpact_type type; /* OFPACT_*. */
enum ofputil_action_code compat; /* Original type when added, if any. */
uint16_t len; /* Length of the action, in bytes, including
* struct ofpact, excluding padding. */
};
enum ofpact_type type; /* OFPACT_*. */
enum ofputil_action_code compat; /* Original type when added, if any. */
uint16_t len; /* Length of the action, in bytes, including
* struct ofpact, excluding padding. */
};
/* OFPACT_OUTPUT.
*
* Used for OFPAT10_OUTPUT. */
struct ofpact_output {
struct ofpact ofpact;
uint16_t port; /* Output port. */
uint16_t max_len; /* Max send len, for port OFPP_CONTROLLER. */
};
*
* Used for OFPAT10_OUTPUT. */
struct ofpact_output {
struct ofpact ofpact;
uint16_t port; /* Output port. */
uint16_t max_len; /* Max send len, for port OFPP_CONTROLLER. */
};
static void xlate_output_action(struct action_xlate_ctx *ctx,uint16_t port, uint16_t max_len, bool may_packet_in)
{
uint16_t prev_nf_output_iface = ctx->nf_output_iface;
ctx->nf_output_iface = NF_OUT_DROP;
//TODO:看of 1.0协议来明确这些端口号的含义;这里提供的port是在哪儿得到的呢?
uint16_t prev_nf_output_iface = ctx->nf_output_iface;
ctx->nf_output_iface = NF_OUT_DROP;
//TODO:看of 1.0协议来明确这些端口号的含义;这里提供的port是在哪儿得到的呢?
//在具体的ofpact_*结构体中,这里看 ofpact_output;
switch (port) {
case OFPP_IN_PORT://从入口发送,这个虚拟端口必须明确指示;
compose_output_action(ctx, ctx->flow.in_port);
break;
case OFPP_TABLE:
xlate_table_action(ctx, ctx->flow.in_port, 0, may_packet_in);
break;
case OFPP_NORMAL:
xlate_normal(ctx);
break;
case OFPP_FLOOD:
flood_packets(ctx, false);
break;
case OFPP_ALL:
flood_packets(ctx, true);
break;
case OFPP_CONTROLLER:
execute_controller_action(ctx, max_len, OFPR_ACTION, 0);
break;
case OFPP_NONE:
break;
case OFPP_LOCAL:
default:
if (port != ctx->flow.in_port) {
compose_output_action(ctx, port);
} else {
xlate_report(ctx, "skipping output to input port");
}
break;
}
if (prev_nf_output_iface == NF_OUT_FLOOD) {
ctx->nf_output_iface = NF_OUT_FLOOD;
} else if (ctx->nf_output_iface == NF_OUT_DROP) {
ctx->nf_output_iface = prev_nf_output_iface;
} else if (prev_nf_output_iface != NF_OUT_DROP &&
ctx->nf_output_iface != NF_OUT_FLOOD) {
ctx->nf_output_iface = NF_OUT_MULTI;
}
}
switch (port) {
case OFPP_IN_PORT://从入口发送,这个虚拟端口必须明确指示;
compose_output_action(ctx, ctx->flow.in_port);
break;
case OFPP_TABLE:
xlate_table_action(ctx, ctx->flow.in_port, 0, may_packet_in);
break;
case OFPP_NORMAL:
xlate_normal(ctx);
break;
case OFPP_FLOOD:
flood_packets(ctx, false);
break;
case OFPP_ALL:
flood_packets(ctx, true);
break;
case OFPP_CONTROLLER:
execute_controller_action(ctx, max_len, OFPR_ACTION, 0);
break;
case OFPP_NONE:
break;
case OFPP_LOCAL:
default:
if (port != ctx->flow.in_port) {
compose_output_action(ctx, port);
} else {
xlate_report(ctx, "skipping output to input port");
}
break;
}
if (prev_nf_output_iface == NF_OUT_FLOOD) {
ctx->nf_output_iface = NF_OUT_FLOOD;
} else if (ctx->nf_output_iface == NF_OUT_DROP) {
ctx->nf_output_iface = prev_nf_output_iface;
} else if (prev_nf_output_iface != NF_OUT_DROP &&
ctx->nf_output_iface != NF_OUT_FLOOD) {
ctx->nf_output_iface = NF_OUT_MULTI;
}
}
这里就看OFPP_ALL的情况,也是通常的情况。
static void flood_packets(struct action_xlate_ctx *ctx, bool all)
{
struct ofport_dpif *ofport;
static void flood_packets(struct action_xlate_ctx *ctx, bool all)
{
struct ofport_dpif *ofport;
// 会遍历这个datapath所有的port,然后发送packet;
HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) {
uint16_t ofp_port = ofport->up.ofp_port;
if (ofp_port == ctx->flow.in_port) {
continue;
}
if (all) {
compose_output_action__(ctx, ofp_port, false);
} else if (!(ofport->up.pp.config & OFPUTIL_PC_NO_FLOOD)) {
compose_output_action(ctx, ofp_port);
}
}
ctx->nf_output_iface = NF_OUT_FLOOD;
}
HMAP_FOR_EACH (ofport, up.hmap_node, &ctx->ofproto->up.ports) {
uint16_t ofp_port = ofport->up.ofp_port;
if (ofp_port == ctx->flow.in_port) {
continue;
}
if (all) {
compose_output_action__(ctx, ofp_port, false);
} else if (!(ofport->up.pp.config & OFPUTIL_PC_NO_FLOOD)) {
compose_output_action(ctx, ofp_port);
}
}
ctx->nf_output_iface = NF_OUT_FLOOD;
}
static void compose_output_action__(struct action_xlate_ctx *ctx, uint16_t ofp_port,bool check_stp)
{
const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
{
const struct ofport_dpif *ofport = get_ofp_port(ctx->ofproto, ofp_port);
//根据端口号得到对应的实体;
uint16_t odp_port = ofp_port_to_odp_port(ofp_port);
uint16_t odp_port = ofp_port_to_odp_port(ofp_port);
//openflow协议定义的端口到ovs端口的映射,OFPP_LOCAL->OVSP_LOCAL,OFPP_NONE->OVSP_NONE,剩下的不变如ALL;
ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
uint8_t flow_nw_tos = ctx->flow.nw_tos;
uint16_t out_port;
if (ofport) {
struct priority_to_dscp *pdscp;
if (ofport->up.pp.config & OFPUTIL_PC_NO_FWD) {
xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
return;
} else if (check_stp && !stp_forward_in_state(ofport->stp_state)) {
xlate_report(ctx, "STP not in forwarding state, skipping output");
return;
}
pdscp = get_priority(ofport, ctx->flow.skb_priority);
ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
uint8_t flow_nw_tos = ctx->flow.nw_tos;
uint16_t out_port;
if (ofport) {
struct priority_to_dscp *pdscp;
if (ofport->up.pp.config & OFPUTIL_PC_NO_FWD) {
xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
return;
} else if (check_stp && !stp_forward_in_state(ofport->stp_state)) {
xlate_report(ctx, "STP not in forwarding state, skipping output");
return;
}
pdscp = get_priority(ofport, ctx->flow.skb_priority);
//差分服务码相关;
if (pdscp) {
ctx->flow.nw_tos &= ~IP_DSCP_MASK;
ctx->flow.nw_tos |= pdscp->dscp;
}
} else {
/* We may not have an ofport record for this port, but it doesn't hurt
* to allow forwarding to it anyhow. Maybe such a port will appear
* later and we're pre-populating the flow table. */
}
out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,ctx->flow.vlan_tci);
if (out_port != odp_port) {
ctx->flow.vlan_tci = htons(0);
}
commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions);
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port);
if (pdscp) {
ctx->flow.nw_tos &= ~IP_DSCP_MASK;
ctx->flow.nw_tos |= pdscp->dscp;
}
} else {
/* We may not have an ofport record for this port, but it doesn't hurt
* to allow forwarding to it anyhow. Maybe such a port will appear
* later and we're pre-populating the flow table. */
}
out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,ctx->flow.vlan_tci);
if (out_port != odp_port) {
ctx->flow.vlan_tci = htons(0);
}
commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions);
nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port);
//然后加入输出端口号,问题是告知内核输出,那么相应的packet呢?
ctx->sflow_odp_port = odp_port;
ctx->sflow_n_outputs++;
ctx->nf_output_iface = ofp_port;
ctx->flow.vlan_tci = flow_vlan_tci;
ctx->flow.nw_tos = flow_nw_tos;
}
ctx->sflow_odp_port = odp_port;
ctx->sflow_n_outputs++;
ctx->nf_output_iface = ofp_port;
ctx->flow.vlan_tci = flow_vlan_tci;
ctx->flow.nw_tos = flow_nw_tos;
}
/* Returns the ODP port number of the Linux VLAN device that corresponds to
* 'vlan_tci' on the network device with port number 'realdev_odp_port' in
* 'ofproto'. For example, given 'realdev_odp_port' of eth0 and 'vlan_tci' 9,
* it would return the port number of eth0.9.
*
* Unless VLAN splinters are enabled for port 'realdev_odp_port', this
* function just returns its 'realdev_odp_port' argument. */
static uint32_t vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, uint32_t realdev_odp_port, ovs_be16 vlan_tci)
{
if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
uint16_t realdev_ofp_port = odp_port_to_ofp_port(realdev_odp_port);
int vid = vlan_tci_to_vid(vlan_tci);
const struct vlan_splinter *vsp;
HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
hash_realdev_vid(realdev_ofp_port, vid),
&ofproto->realdev_vid_map) {
if (vsp->realdev_ofp_port == realdev_ofp_port
&& vsp->vid == vid) {
return ofp_port_to_odp_port(vsp->vlandev_ofp_port);
}
}
}
return realdev_odp_port; //没有牵扯到VLAN的话就直接返回那个端口号;
}
从base flow将那些不同的字段更新到flow中,同时将会设置odp_actions相应的属性OVS_KEY_ATTR_TCP,OVS_KEY_ATTR_ETHERNET等等。
void commit_odp_actions(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions)
{
commit_set_tun_id_action(flow, base, odp_actions);
commit_set_ether_addr_action(flow, base, odp_actions);
commit_vlan_action(flow, base, odp_actions);
commit_set_nw_action(flow, base, odp_actions);
commit_set_port_action(flow, base, odp_actions);
commit_set_priority_action(flow, base, odp_actions);
commit_set_skb_mark_action(flow, base, odp_actions);
}