以e1000e为例
一些初始化过程
module init 注册pci driver
ret = pci_register_driver(&e1000_driver);
pci_driver e1000_driver.probe = e1000_probe
e1000_probe probe device探测device即有新的device被pci总线发现
e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
- netdev = alloc_etherdev(sizeof(struct e1000_adapter));内存布局 net_device 按32字节对齐,然后是 e1000_adapter,参考 netdev_priv(netdev)
- netdev->irq = pdev->irq;
- netdev->netdev_ops = &e1000e_netdev_ops;
- netif_napi_add(netdev, &adapter->napi, e1000e_poll , 64);
- 【napi_struct->poll就会被初始化为e1000e_poll】
- e1000_sw_init(adapter);
- e1000_alloc_queues(adapter);
- set_bit(__E1000_DOWN, &adapter->state);
- e1000e_read_mac_addr(&adapter->hw)
- memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
- register_netdev(netdev);
e1000_open
- e1000_configure(adapter);
- e1000_configure_rx(adapter);
- adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL);
- 即调用 e1000_alloc_rx_buffers 提前为报文描述符skb和报文数据分配内存
- 从kmem_cache *skbuff_head_cache中分配skb
- 从 per-CPU变量netdev_alloc_cache中分配报文数据内存!!
- err = e1000_request_irq(adapter);
- napi_enable(&adapter->napi);
- e1000_irq_enable(adapter);
e1000e_up
- e1000_configure(adapter);
- e1000_configure_rx(adapter); 【同 e1000_open】
收到报文
irqreturn_t e1000_intr(int __always_unused irq, void *data)
__napi_schedule(&adapter->napi) 注意napi代表一个device
- local_irq_save(flags); 关中断
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
- * list_add_tail(&napi->poll_list, &sd->poll_list); *
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- local_irq_restore(flags);
软中断NET_RX_SOFTIRQ handler net_rx_action
- void net_rx_action(struct softirq_action *h)
- 把softnet_data的poll_list拼接splice到局部变量中, 再init sd的poll_list
- 遍历每一个 napi_struct,调用他们的poll函数,即e1000e_poll
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
e1000e_poll
关注对ethernet的GRO
GRO之后就把报文上传给网络接口层 netif_receive_skb_internal
- adapter->clean_rx(adapter->rx_ring, &work_done, weight);
- 即bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, int work_to_do)
- 逐个遍历ring数组每个元素的status_error ,如果是 E1000_RXD_STAT_DD 即Descriptor Done 则
- 取得skb,并调用 e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan);
- 设置 skb->pkt_type 可能为 PACKET_HOST
- 设置 skb->protocol 可能为 ETH_P_802_2
- napi_gro_receive(&adapter->napi, skb);
- gro_result_t ret = dev_gro_receive(napi, skb)
- -
- gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
- case GRO_NORMAL: netif_receive_skb_internal(skb)
- gro_result_t ret = dev_gro_receive(napi, skb)
- 取得skb,并调用 e1000_receive_skb(adapter, netdev, skb, staterr, rx_desc->wb.upper.vlan);
netif_receive_skb_internal会考虑RPS
Receive Packet Steering把报文负载均衡到其他CPU softnetdata 的input_pkt_queue 中
skb变量pfmemalloc的含义
- __GFP_MEMALLOC 对应 ALLOC_NO_WATERMARKS 参见 gfp_to_alloc_flags(gfp_t gfp_mask)
- ALLOC_NO_WATERMARKS 对应 set_page_pfmemalloc(page)
对应 nc->pfmemalloc = page_is_pfmemalloc(page); 参见 __alloc_page_frag(struct page_frag_cache *nc, , gfp_t gfp_mask)
- __napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask) 关键调用:
- __napi_alloc_skb(struct napi_struct *napi, unsigned int len,
对应 skb->pfmemalloc = 1; 参见 __napi_alloc_skb(struct napi_struct *napi,