struct ethhdr、ether_header、iphdr、tcphdr、udphdr

本文详细介绍了网络通信中关键协议的数据包结构,包括以太网(Ethernet)、IP、TCP及UDP等,通过具体字段说明了各层协议如何实现数据传输。

************************eth的结构**************************************

struct ethhdr {
unsigned char h_dest[ETH_ALEN];
unsigned char h_source[ETH_ALEN];
__be16 h_proto;
} __attribute__((packed));

struct ether_header
{
u_int8_t ether_dhost[ETH_ALEN];      // destination eth addr 
u_int8_t ether_shost[ETH_ALEN];      // source ether addr    
u_int16_t ether_type;                 // packet type ID field 
} __attribute__ ((__packed__));

***********************IP的结构***********************************
struct iphdr
{
#if __BYTE_ORDER == __LITTLE_ENDIAN
    unsigned int ihl:4;
    unsigned int version:4;
#elif __BYTE_ORDER == __BIG_ENDIAN
    unsigned int version:4;
    unsigned int ihl:4;
#else
# error "Please fix <bits/endian.h>"
#endif
    u_int8_t tos;
    u_int16_t tot_len;
    u_int16_t id;
    u_int16_t frag_off;
    u_int8_t ttl;
    u_int8_t protocol;
    u_int16_t check;
    u_int32_t saddr;
    u_int32_t daddr;
};

***********************TCP的结构****************************
struct tcphdr
{
    u_int16_t source;
    u_int16_t dest;
    u_int32_t seq;
    u_int32_t ack_seq;
# if __BYTE_ORDER == __LITTLE_ENDIAN
    u_int16_t res1:4;
    u_int16_t doff:4;
    u_int16_t fin:1;
    u_int16_t syn:1;
    u_int16_t rst:1;
    u_int16_t psh:1;
    u_int16_t ack:1;
    u_int16_t urg:1;
    u_int16_t res2:2;
# elif __BYTE_ORDER == __BIG_ENDIAN
    u_int16_t doff:4;
    u_int16_t res1:4;
    u_int16_t res2:2;
    u_int16_t urg:1;
    u_int16_t ack:1;
    u_int16_t psh:1;
    u_int16_t rst:1;
    u_int16_t syn:1;
    u_int16_t fin:1;
# else
#   error "Adjust your <bits/endian.h> defines"
# endif
    u_int16_t window;
    u_int16_t check;
    u_int16_t urg_ptr;
};
***********************UDP的结构*****************************
struct udphdr
{
u_int16_t source;
u_int16_t dest;
u_int16_t len;
u_int16_t check;
};


转于:http://blog.youkuaiyun.com/sally2021/article/details/4493391


*/ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/if_ether.h> #include <net/dst.h> #include <net/arp.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip.h> #include <net/dsa.h> #include <net/flow_dissector.h> #include <linux/uaccess.h> #include <linux/tcp.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/icmp.h> #include <linux/proc_fs.h> #include <linux/time.h> #include <linux/jiffies.h> #include <linux/kernel_stat.h> #include <linux/slab.h> __setup(“ether=”, netdev_boot_setup); static struct timer_list cpu_monitor_timer; static u64 prev_user, prev_nice, prev_system, prev_idle; static u64 prev_iowait, prev_irq, prev_softirq, prev_steal; static int qos_is_start; /* 优化队列结构 */ static struct { struct sk_buff_head high_pri; // 高优先级队列 struct sk_buff_head low_pri; // 低优先级队列 atomic_t scheduled; // 调度标记 u32 high_count; // 高优先级计数 u32 low_count; // 低优先级计数 u32 bypass_count; // 直通计数 } qos_queue; /** eth_header - create the Ethernet header @skb: buffer to alter @dev: source device @type: Ethernet type field @daddr: destination address (NULL leave destination address) @saddr: source address (NULL use device source address) @len: packet length (<= skb->len) Set the protocol type. For a packet of type ETH_P_802_3/2 we put the length in here instead. */ int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); if (type != ETH_P_802_3 && type != ETH_P_802_2) eth->h_proto = htons(type); else eth->h_proto = htons(len); /* Set the source hardware address. */ if (!saddr) saddr = dev->dev_addr; memcpy(eth->h_source, saddr, ETH_ALEN); if (daddr) { memcpy(eth->h_dest, daddr, ETH_ALEN); return ETH_HLEN; } /* Anyway, the loopback-device should never use this function... */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { eth_zero_addr(eth->h_dest); return ETH_HLEN; } return -ETH_HLEN; } EXPORT_SYMBOL(eth_header); /** eth_get_headlen - determine the length of header for an ethernet frame @data: pointer to start of frame @len: total length of frame Make a best effort attempt to pull the length for all of the headers for a given frame in a linear buffer. */ u32 eth_get_headlen(void *data, unsigned int len) { const struct ethhdr *eth = (const struct ethhdr *)data; struct flow_keys keys; /* this should never happen, but better safe than sorry */ if (unlikely(len < sizeof(*eth))) return len; /* parse any remaining L2/L3 headers, check for L4 */ if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto, sizeof(*eth), len, 0)) return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len); } EXPORT_SYMBOL(eth_get_headlen); /** eth_type_trans - determine the packet’s protocol ID. @skb: received socket data @dev: receiving network device The rule here is that we assume 802.3 if the type field is short enough to be a length. This is normal practice and works for any ‘now in use’ protocol. */ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { unsigned short _service_access_point; const unsigned short *sap; const struct ethhdr *eth; skb->dev = dev; skb_reset_mac_header(skb); eth = (struct ethhdr *)skb->data; skb_pull_inline(skb, ETH_HLEN); if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) { if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; } else if (unlikely(!ether_addr_equal_64bits(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; /* Some variants of DSA tagging don’t have an ethertype field at all, so we check here whether one of those tagging variants has been configured on the receiving interface, and if so, set skb->protocol without looking at the packet. */ if (unlikely(netdev_uses_dsa(dev))) return htons(ETH_P_XDSA); if (likely(eth_proto_is_802_3(eth->h_proto))) return eth->h_proto; /* This is a magic hack to spot IPX packets. Older Novell breaks the protocol design and runs IPX over 802.3 without an 802.2 LLC layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This won't work for fault tolerant netware but does for the rest. */ sap = skb_header_pointer(skb, 0, sizeof(*sap), &_service_access_point); if (sap && *sap == 0xFFFF) return htons(ETH_P_802_3); /* Real 802.2 LLC */ return htons(ETH_P_802_2); } EXPORT_SYMBOL(eth_type_trans); /** eth_header_parse - extract hardware address from packet @skb: packet to extract header from @haddr: destination buffer */ int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr) { const struct ethhdr *eth = eth_hdr(skb); memcpy(haddr, eth->h_source, ETH_ALEN); return ETH_ALEN; } EXPORT_SYMBOL(eth_header_parse); /** eth_header_cache - fill cache entry from neighbour @neigh: source neighbour @hh: destination cache entry @type: Ethernet type field Create an Ethernet header template from the neighbour. */ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) { struct ethhdr *eth; const struct net_device *dev = neigh->dev; eth = (struct ethhdr *) (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == htons(ETH_P_802_3)) return -1; eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); hh->hh_len = ETH_HLEN; return 0; } EXPORT_SYMBOL(eth_header_cache); /** eth_header_cache_update - update cache entry @hh: destination cache entry @dev: network device @haddr: new hardware address Called by Address Resolution module to notify changes in address. */ void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr) { memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), haddr, ETH_ALEN); } EXPORT_SYMBOL(eth_header_cache_update); /** eth_prepare_mac_addr_change - prepare for mac change @dev: network device @p: socket address */ int eth_prepare_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; return 0; } EXPORT_SYMBOL(eth_prepare_mac_addr_change); /** eth_commit_mac_addr_change - commit mac change @dev: network device @p: socket address */ void eth_commit_mac_addr_change(struct net_device *dev, void *p) { struct sockaddr *addr = p; memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); } EXPORT_SYMBOL(eth_commit_mac_addr_change); /** eth_mac_addr - set new Ethernet hardware address @dev: network device @p: socket address Change hardware address of device. This doesn’t change hardware matching, so needs to be overridden for most real devices. */ int eth_mac_addr(struct net_device *dev, void *p) { int ret; ret = eth_prepare_mac_addr_change(dev, p); if (ret < 0) return ret; eth_commit_mac_addr_change(dev, p); return 0; } EXPORT_SYMBOL(eth_mac_addr); /** eth_change_mtu - set new MTU size @dev: network device @new_mtu: new Maximum Transfer Unit Allow changing MTU size. Needs to be overridden for devices supporting jumbo frames. */ int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) return -EINVAL; dev->mtu = new_mtu; return 0; } EXPORT_SYMBOL(eth_change_mtu); int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; return 0; } EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, .parse = eth_header_parse, .cache = eth_header_cache, .cache_update = eth_header_cache_update, }; /** ether_setup - setup Ethernet network device @dev: network device Fill in the fields of the device structure with Ethernet-generic values. */ void ether_setup(struct net_device dev) { dev->header_ops = &eth_header_ops; dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->min_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; / Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; dev->priv_flags |= IFF_TX_SKB_SHARING; eth_broadcast_addr(dev->broadcast); } EXPORT_SYMBOL(ether_setup); /** alloc_etherdev_mqs - Allocates and sets up an Ethernet device @sizeof_priv: Size of additional driver-private structure to be allocated for this Ethernet device @txqs: The number of TX queues this device has. @rxqs: The number of RX queues this device has. Fill in the fields of the device structure with Ethernet-generic values. Basically does everything except registering the device. Constructs a new net device, complete with a private data area of size (sizeof_priv). A 32-byte (not bit) alignment is enforced for this private data area. */ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, unsigned int rxqs) { return alloc_netdev_mqs(sizeof_priv, “eth%d”, NET_NAME_UNKNOWN, ether_setup, txqs, rxqs); } EXPORT_SYMBOL(alloc_etherdev_mqs); ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { return scnprintf(buf, PAGE_SIZE, “%*phC\n”, len, addr); } EXPORT_SYMBOL(sysfs_format_mac); struct sk_buff **eth_gro_receive(struct sk_buff **head, struct sk_buff *skb) { struct sk_buff *p, **pp = NULL; struct ethhdr *eh, *eh2; unsigned int hlen, off_eth; const struct packet_offload *ptype; __be16 type; int flush = 1; off_eth = skb_gro_offset(skb); hlen = off_eth + sizeof(*eh); eh = skb_gro_header_fast(skb, off_eth); if (skb_gro_header_hard(skb, hlen)) { eh = skb_gro_header_slow(skb, hlen, off_eth); if (unlikely(!eh)) goto out; } flush = 0; for (p = *head; p; p = p->next) { if (!NAPI_GRO_CB(p)->same_flow) continue; eh2 = (struct ethhdr *)(p->data + off_eth); if (compare_ether_header(eh, eh2)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } type = eh->h_proto; rcu_read_lock(); ptype = gro_find_receive_by_type(type); if (ptype == NULL) { flush = 1; goto out_unlock; } skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); out: NAPI_GRO_CB(skb)->flush |= flush; return pp; } EXPORT_SYMBOL(eth_gro_receive); int eth_gro_complete(struct sk_buff *skb, int nhoff) { struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff); __be16 type = eh->h_proto; struct packet_offload *ptype; int err = -ENOSYS; if (skb->encapsulation) skb_set_inner_mac_header(skb, nhoff); rcu_read_lock(); ptype = gro_find_complete_by_type(type); if (ptype != NULL) err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(struct ethhdr)); rcu_read_unlock(); return err; } EXPORT_SYMBOL(eth_gro_complete); static struct packet_offload eth_packet_offload __read_mostly = { .type = cpu_to_be16(ETH_P_TEB), .priority = 10, .callbacks = { .gro_receive = eth_gro_receive, .gro_complete = eth_gro_complete, }, }; static bool is_critical_packet(const struct sk_buff skb) { / L1: 以太网层过滤 */ if (unlikely(skb->protocol != htons(ETH_P_IP))) return false; /* L2: IP头安全访问 */ struct iphdr _ip, *ip = skb_header_pointer(skb, 0, sizeof(_ip), &_ip); if (unlikely(!ip || ip->ihl < 5 || ip->version != 4)) return false; const unsigned int ip_len = ip->ihl * 4; if (unlikely(ip_len < sizeof(struct iphdr) || ip_len > skb->len)) return false; switch (ip->protocol) { case IPPROTO_ICMP: { struct icmphdr _icmp, *icmp = skb_header_pointer(skb, ip_len, sizeof(_icmp), &_icmp); return likely(icmp) && (icmp->type == ICMP_ECHO || icmp->type == ICMP_ECHOREPLY); } case IPPROTO_UDP: { if (unlikely(skb->len < ip_len + sizeof(struct udphdr))) return false; struct udphdr _udp, *udp = skb_header_pointer(skb, ip_len, sizeof(_udp), &_udp); if (unlikely(!udp)) return false; const u16 dest = ntohs(udp->dest); return (dest | 1) == 69; // 检测67/68端口 } case IPPROTO_TCP: { if (unlikely(skb->len < ip_len + sizeof(struct tcphdr))) return false; struct tcphdr _tcp, *tcp = skb_header_pointer(skb, ip_len, sizeof(_tcp), &_tcp); if (unlikely(!tcp)) return false; return (ntohs(tcp->dest) == 29814||ntohs(tcp->source) == 29814); } default: return false; } return false; } /* 队列处理函数 */ static void process_qos_queue(void) { struct sk_buff *skb; int processed = 0; unsigned long flags; local_irq_save(flags); /* 优先处理高优先级队列(现在有数据)*/ while ((processed < 64) && (skb = __skb_dequeue(&qos_queue.high_pri))) { netif_receive_skb(skb); processed++; } /* 处理低优先级队列 */ while ((processed < 64) && (skb = __skb_dequeue(&qos_queue.low_pri))) { netif_receive_skb(skb); processed++; } if (!skb_queue_empty(&qos_queue.high_pri) || !skb_queue_empty(&qos_queue.low_pri)) { atomic_set(&qos_queue.scheduled, 0); process_qos_queue(); } else { atomic_set(&qos_queue.scheduled, 0); } local_irq_restore(flags); } /* 调度入口函数 */ void rx_qos_scheduler(struct sk_buff *skb) { unsigned long flags; local_irq_save(flags); if (likely(!qos_is_start)) { qos_queue.bypass_count++; netif_receive_skb(skb); // 低负载直接处理 return; } if (is_critical_packet(skb)) { __skb_queue_tail(&qos_queue.high_pri, skb); qos_queue.bypass_count++; qos_queue.high_count++; } else { __skb_queue_tail(&qos_queue.low_pri, skb); qos_queue.bypass_count++; qos_queue.low_count++; } if (!atomic_xchg(&qos_queue.scheduled, 1)) { process_qos_queue(); } local_irq_restore(flags); } EXPORT_SYMBOL(rx_qos_scheduler); /* 调试接口 */ static int qos_stats_show(struct seq_file *m, void *v) { seq_printf(m, “High Priority Packets: %u\n”, qos_queue.high_count); seq_printf(m, “Low Priority Packets: %u\n”, qos_queue.low_count); seq_printf(m, “Bypassed Packets: %u\n”, qos_queue.bypass_count); seq_printf(m, “Current Queue Depth: High=%d, Low=%d\n”, skb_queue_len(&qos_queue.high_pri), skb_queue_len(&qos_queue.low_pri)); seq_printf(m, “qos_is_start: %d\n”, qos_is_start); return 0; } static int qos_stats_open(struct inode *inode, struct file *file) { return single_open(file, qos_stats_show, NULL); } static const struct file_operations qos_stats_fops = { .owner = THIS_MODULE, .open = qos_stats_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; void cpu_timer_callback(struct timer_list *t) { int cpu_usage = 0; struct kernel_cpustat *kstat = &kcpustat_cpu(0); u64 cur_user = kstat->cpustat[CPUTIME_USER]; u64 cur_nice = kstat->cpustat[CPUTIME_NICE]; u64 cur_system = kstat->cpustat[CPUTIME_SYSTEM]; u64 cur_idle = kstat->cpustat[CPUTIME_IDLE]; u64 cur_iowait = kstat->cpustat[CPUTIME_IOWAIT]; u64 cur_irq = kstat->cpustat[CPUTIME_IRQ]; u64 cur_softirq = kstat->cpustat[CPUTIME_SOFTIRQ]; u64 cur_steal = kstat->cpustat[CPUTIME_STEAL]; u64 prev_total = prev_user + prev_nice + prev_system + prev_idle + prev_iowait + prev_irq + prev_softirq + prev_steal; u64 cur_total = cur_user + cur_nice + cur_system + cur_idle + cur_iowait + cur_irq + cur_softirq + cur_steal; u64 prev_busy = prev_total - prev_idle; u64 cur_busy = cur_total - cur_idle; s64 diff_total = cur_total - prev_total; s64 diff_busy = cur_busy - prev_busy; if (diff_total > 0) { cpu_usage = div64_u64(diff_busy * 100, diff_total); cpu_usage = min(cpu_usage, 100); cpu_usage = max(cpu_usage, 0); } if (cpu_usage > 90) { qos_is_start = 1; } else { qos_is_start = 0; } prev_user = cur_user; prev_nice = cur_nice; prev_system = cur_system; prev_idle = cur_idle; prev_iowait = cur_iowait; prev_irq = cur_irq; prev_softirq = cur_softirq; prev_steal = cur_steal; mod_timer(&cpu_monitor_timer, jiffies + HZ); } static int __init rx_scheduler_init(void) { skb_queue_head_init(&qos_queue.high_pri); skb_queue_head_init(&qos_queue.low_pri); atomic_set(&qos_queue.scheduled, 0); qos_queue.high_count = 0; qos_queue.low_count = 0; /* 创建调试接口 */ proc_create("qos_stats", 0, NULL, &qos_stats_fops); printk(KERN_INFO "ETH QoS: Initialized\n"); struct kernel_cpustat *kstat = &kcpustat_cpu(0); prev_user = kstat->cpustat[CPUTIME_USER]; prev_nice = kstat->cpustat[CPUTIME_NICE]; prev_system = kstat->cpustat[CPUTIME_SYSTEM]; prev_idle = kstat->cpustat[CPUTIME_IDLE]; prev_iowait = kstat->cpustat[CPUTIME_IOWAIT]; prev_irq = kstat->cpustat[CPUTIME_IRQ]; prev_softirq = kstat->cpustat[CPUTIME_SOFTIRQ]; prev_steal = kstat->cpustat[CPUTIME_STEAL]; setup_timer(&cpu_monitor_timer, cpu_timer_callback, 0); mod_timer(&cpu_monitor_timer, jiffies + HZ); printk(KERN_INFO "cpu monitor init\n"); return 0; } static int __init eth_offload_init(void) { dev_add_offload(&eth_packet_offload); return 0; } fs_initcall(eth_offload_init); subsys_initcall(rx_scheduler_init); 我现在有这个程序,可以优先调度我认为的重要报文,但是我想要测试一下是否有效果,请你写一个内核模块,让cpu满载以至于报文延迟大或者丢包,然后我测试这个程序是否有用可以将重要报文优先处理 注意写的内核模块版本应为4.4.115
最新发布
10-13
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值