当设备接收到一个包,会从类型字段得到协议类型,如:IP,802.3,ARP,IPv6等。然后根据类型,调用不同的处理函数,这类似面向对象的操作,通过下面的方式实现:
- 定义一个结构,用来将类型和函数对应起来
[ include/linux/netdevice.h ]
struct packet_type { __be16 type; /* This is really htons(ether_type).包的类型 */ struct net_device *dev; /* NULL is wildcarded here.对应的网络设备 */ int (*func) (struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); void *af_packet_priv; struct list_head list; };
- 定义一个全局列表,所有packet_type类型为ETH_P_ALL(接收所有类型的包)的都挂在此列表上
[ net/core/dev.c ]
struct list_head ptype_all __read_mostly; /* Taps */ - 定义一个哈希表,其中的key为包的类型
[ net/core/dev.c ]
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
[ include/linux/netdevice.h ]
/* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. * * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * * NOTE: That is no longer true with the addition of VLAN tags. Not * sure which should go first, but I bet it won't make much * difference if we are running VLANs. The good news is that * this protocol won't be in the list unless compiled in, so * the average user (w/out VLANs) will not be adversely affected. * --BLG * * 0800 IP * 8100 802.1Q VLAN * 0001 802.3 * 0002 AX.25 * 0004 802.2 * 8035 RARP * 0005 SNAP * 0805 X.25 * 0806 ARP * 8137 IPX * 0009 Localtalk * 86DD IPv6 */ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
以下函数用来向全局列表注册packet_type类型:
[ net/core/dev.c ]
/*
* Add a protocol ID to the list. Now that the input handler is
* smarter we can dispense with all the messy stuff that used to be
* here.
*
* BEWARE!!! Protocol handlers, mangling input packets,
* MUST BE last in hash buckets and checking protocol handlers
* MUST start from promiscuous ptype_all chain in net_bh.
* It is true now, do not change it.
* Explanation follows: if protocol handler, mangling packet, will
* be the first on list, it is not able to sense, that packet
* is cloned and should be copied-on-write, so that it will
* change it and subsequent readers will get broken packet.
* --ANK (980803)
*/
static inline struct list_head *ptype_head(const struct packet_type *pt)
{
if (pt->type == htons(ETH_P_ALL)) // 接收所有类型的包
return &ptype_all;
else
return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
}
/**
* dev_add_pack - add packet handler
* @pt: packet type declaration
*
* Add a protocol handler to the networking stack. The passed &packet_type
* is linked into kernel lists and may not be freed until it has been
* removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new packet type (until the next received packet).
*/
void dev_add_pack(struct packet_type *pt)
{
struct list_head *head = ptype_head(pt); // 得到要挂载的列表
spin_lock(&ptype_lock);
list_add_rcu(&pt->list, head); // 将pt挂到列表上
spin_unlock(&ptype_lock);
}
EXPORT_SYMBOL(dev_add_pack);
为提高接收和发送的效率,尤其是在大负载下的效率,内核作了特别的处理,就是所谓的offload。内核提供了与上述相同的实现方法:
- 所有接收的包都有不同的类型,如IP,802.3,ARP,IPv6等,每种类型都有对应的packet_offload类型
[ include/linux/netdevice.h ]
struct packet_offload { __be16 type; /* This is really htons(ether_type). */ struct offload_callbacks callbacks; struct list_head list; }; struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); };
- 内核申明一个全局数组offload_base
[ net/core/dev.c ]
static struct list_head offload_base __read_mostly;
通过下面的函数将packet_offload注册到数组中:
[ net/core/dev.c ]
/**
* dev_add_offload - register offload handlers
* @po: protocol offload declaration
*
* Add protocol offload handlers to the networking stack. The passed
* &proto_offload is linked into kernel lists and may not be freed until
* it has been removed from the kernel lists.
*
* This call does not sleep therefore it can not
* guarantee all CPU's that are in middle of receiving packets
* will see the new offload handlers (until the next received packet).
*/
void dev_add_offload(struct packet_offload *po)
{
struct list_head *head = &offload_base; // 全局列表
spin_lock(&offload_lock);
list_add_rcu(&po->list, head);
spin_unlock(&offload_lock);
}
EXPORT_SYMBOL(dev_add_offload);