-
intip_output(struct sk_buff*skb)
-
{
-
/* 得到出口的dev */
-
struct net_device*dev=skb_dst(skb)->dev;
-
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUT,skb->len);
-
skb->dev=dev;
-
skb->protocol=htons(ETH_P_IP);
-
return NF_HOOK_COND(NFPROTO_IPV4,NF_INET_POST_ROUTING,skb,NULL,dev,
-
ip_finish_output,
-
!(IPCB(skb)->flags&IPSKB_REROUTED));
-
}
-
staticintip_finish_output(struct sk_buff*skb)
-
{
-
#ifdefined(CONFIG_NETFILTER)&&defined(CONFIG_XFRM)
-
/*Policy lookup after SNAT yielded a new policy*/
-
if(skb_dst(skb)->xfrm!=NULL){
-
IPCB(skb)->flags|=IPSKB_REROUTED;
-
return dst_output(skb);
-
}
-
#endif
-
/* 处理需要IP分片的情况 */
-
if(skb->len>ip_skb_dst_mtu(skb)&&!skb_is_gso(skb))
-
return ip_fragment(skb,ip_finish_output2);
-
/* 不需要IP分片, 我们就看这种一般情况 */
-
else
-
return ip_finish_output2(skb);
-
}
-
static inlineintip_finish_output2(struct sk_buff*skb)
-
{
-
struct dst_entry*dst=skb_dst(skb);
-
struct rtable*rt=(struct rtable*)dst;
-
struct net_device*dev=dst->dev;
-
unsignedinthh_len=LL_RESERVED_SPACE(dev);
-
if(rt->rt_type==RTN_MULTICAST){
-
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUTMCAST,skb->len);
-
}elseif(rt->rt_type==RTN_BROADCAST)
-
IP_UPD_PO_STATS(dev_net(dev),IPSTATS_MIB_OUTBCAST,skb->len);
-
-
/*Be paranoid,rather than too clever.*/
-
if(unlikely(skb_headroom(skb)<hh_len&&dev->header_ops)){
-
/*
-
skb的首部空间不足,无法保存l2层的硬件地址。
-
这时,需要重新分配一个更大bufer。
-
*/
-
struct sk_buff*skb2;
-
-
skb2=skb_realloc_headroom(skb,LL_RESERVED_SPACE(dev));
-
if(skb2==NULL){
-
kfree_skb(skb);
-
return-ENOMEM;
-
}
-
if(skb->sk)
-
skb_set_owner_w(skb2,skb->sk);
-
kfree_skb(skb);
-
skb=skb2;
-
}
-
if(dst->hh)
-
return neigh_hh_output(dst->hh,skb);
-
/*
-
dst没有L2层地址的cache,需要调用neighbour子系统的output进行发送。
-
*/
-
elseif(dst->neighbour)
-
return dst->neighbour->output(skb);
-
if(net_ratelimit())
-
printk(KERN_DEBUG"ip_finish_output2: No header cache and no neighbour!\n");
-
kfree_skb(skb);
-
return-EINVAL;
-
}
-
int neigh_resolve_output(struct sk_buff *skb)
-
{
-
struct dst_entry *dst = skb_dst(skb);
-
struct neighbour *neigh;
-
int rc = 0;
-
-
if(!dst ||!(neigh = dst->neighbour))
-
goto discard;
-
__skb_pull(skb, skb_network_offset(skb));
-
if(!neigh_event_send(neigh, skb)){
-
/* 无需发送neigh请求, 可以直接从dev中获得 */
-
interr;
-
struct net_device *dev = neigh->dev;
-
if(dev->header_ops->cache &&!dst->hh){
-
write_lock_bh(&neigh->lock);
-
if(!dst->hh)
-
neigh_hh_init(neigh, dst, dst->ops->protocol);
-
err= dev_hard_header(skb, dev, ntohs(skb->protocol),
-
neigh->ha,NULL, skb->len);
-
write_unlock_bh(&neigh->lock);
-
}else{
-
read_lock_bh(&neigh->lock);
-
err= dev_hard_header(skb, dev, ntohs(skb->protocol),
-
neigh->ha,NULL, skb->len);
-
read_unlock_bh(&neigh->lock);
-
}
-
if(err>= 0)
-
rc = neigh->ops->queue_xmit(skb); //发送数据包
-
else
-
goto out_kfree_skb;
-
}
-
out:
-
return rc;
-
discard:
-
NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
-
dst, dst ? dst->neighbour :NULL);
-
out_kfree_skb:
-
rc =-EINVAL;
-
kfree_skb(skb);
-
goto out;
-
}
-
while(neigh->nud_state & NUD_VALID &&
-
(skb = __skb_dequeue(&neigh->arp_queue))!=NULL){
-
struct neighbour *n1 = neigh;
-
write_unlock_bh(&neigh->lock);
-
/*On shaper/eql skb->dst->neighbour != neigh :(*/
-
if(skb_dst(skb)&& skb_dst(skb)->neighbour)
-
n1 = skb_dst(skb)->neighbour;
-
n1->output(skb);
-
write_lock_bh(&neigh->lock);
-
}
-
intdev_queue_xmit(struct sk_buff*skb)
-
{
-
struct net_device*dev=skb->dev;
-
struct netdev_queue*txq;
-
struct Qdisc*q;
-
intrc=-ENOMEM;
-
-
/*Disable soft irqsforvarious locks below.Also
-
*stops preemptionforRCU.
-
*/
-
rcu_read_lock_bh();
-
txq=dev_pick_tx(dev,skb);
-
q=rcu_dereference_bh(txq->qdisc);
-
-
#ifdef CONFIG_NET_CLS_ACT
-
skb->tc_verd=SET_TC_AT(skb->tc_verd,AT_EGRESS);
-
#endif
-
if(q->enqueue){
-
/* 一般的dev都应该进入这里 */
-
rc=__dev_xmit_skb(skb,q,dev,txq);
-
goto out;
-
}
-
}
-
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
-
struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
spinlock_t *root_lock = qdisc_lock(q);
-
bool contended = qdisc_is_running(q);
-
int rc;
-
-
/*
-
* Heuristic to force contended enqueues to serialize on a
-
* separate lock before trying toget qdisc main lock.
-
* This permits __QDISC_STATE_RUNNING owner toget the lock more often
-
*and dequeue packets faster.
-
*/
-
if(unlikely(contended))
-
spin_lock(&q->busylock);
-
-
spin_lock(root_lock);
-
if(unlikely(test_bit(__QDISC_STATE_DEACTIVATED,&q->state))){
-
/* 该quque的状态为非活动的,drop该数据包 */
-
kfree_skb(skb);
-
rc = NET_XMIT_DROP;
-
}elseif((q->flags & TCQ_F_CAN_BYPASS)&&!qdisc_qlen(q)&&
-
qdisc_run_begin(q)){
-
/*
-
* This is a work-conserving queue; there are no old skbs
-
* waiting to be sent out;and the qdisc isnot running -
-
* xmit the skb directly.
-
*/
-
if(!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
-
skb_dst_force(skb);
-
__qdisc_update_bstats(q, skb->len);
-
if(sch_direct_xmit(skb, q, dev, txq, root_lock)){
-
if(unlikely(contended)){
-
spin_unlock(&q->busylock);
-
contended =false;
-
}
-
__qdisc_run(q);
-
}else
-
qdisc_run_end(q);
-
-
rc = NET_XMIT_SUCCESS;
-
}else{
-
skb_dst_force(skb);
-
/* 将数据包加入到queue中 */
-
rc = qdisc_enqueue_root(skb, q);
-
if(qdisc_run_begin(q)){
-
if(unlikely(contended)){
-
spin_unlock(&q->busylock);
-
contended =false;
-
}
-
__qdisc_run(q);
-
}
-
}
-
spin_unlock(root_lock);
-
if(unlikely(contended))
-
spin_unlock(&q->busylock);
-
return rc;
-
}
请看dev_activate,用于激活网卡。
-
void dev_activate(struct net_device *dev)
-
{
-
int need_watchdog;
-
-
/* No queueing discipline is attached to device;
-
create default one i.e. pfifo_fast for devices,
-
which need queueing and noqueue_qdisc for
-
virtual interfaces
-
*/
当没有指定queueing discipline时,就使用默认的discipline
*/
-
if(dev->qdisc ==&noop_qdisc)
-
attach_default_qdiscs(dev);
-
-
...... ......
-
}
-
struct Qdisc_ops pfifo_fast_ops __read_mostly ={
-
.id ="pfifo_fast",
-
.priv_size = sizeof(struct pfifo_fast_priv),
-
.enqueue = pfifo_fast_enqueue,
-
.dequeue = pfifo_fast_dequeue,
-
.peek = pfifo_fast_peek,
-
.init = pfifo_fast_init,
-
.reset = pfifo_fast_reset,
-
.dump = pfifo_fast_dump,
-
.owner = THIS_MODULE,
-
};
-
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
-
{
-
if(skb_queue_len(&qdisc->q)< qdisc_dev(qdisc)->tx_queue_len){
-
int band = prio2band[skb->priority & TC_PRIO_MAX];
-
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
-
struct sk_buff_head *list = band2list(priv, band);
-
-
priv->bitmap |=(1 << band);
-
qdisc->q.qlen++;
-
return __qdisc_enqueue_tail(skb, qdisc, list);
-
}
-
-
return qdisc_drop(skb, qdisc);
-
}
然后我还需要回到__dev_xmit_skb中,在加数据包加入到队列中后。要保证qdisc为运行态。
-
rc = qdisc_enqueue_root(skb, q);
-
if(qdisc_run_begin(q)){
-
if(unlikely(contended)){
-
spin_unlock(&q->busylock);
-
contended =false;
-
}
-
__qdisc_run(q);
-
}
-
void __qdisc_run(struct Qdisc *q)
-
{
-
unsigned long start_time = jiffies;
/*
qdisc_restart中发送了数据包。
这里是循环发送,直至qdisc_restart返回0
或者其它进程请求CPU或发送已运行比较长的时间(1jiffie)则也跳出循环体。
*/
-
while(qdisc_restart(q)){
-
/*
-
* Postpone processing if
-
* 1. another process needs the CPU;
-
* 2. we've been doing it for too long.
-
*/
-
if(need_resched()|| jiffies != start_time){
-
/*
-
需要以后再执行发送动作(利用softirq)
-
*/
-
__netif_schedule(q);
-
break;
-
}
-
}
-
-
qdisc_run_end(q);
-
}
-
int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
-
struct net_device *dev, struct netdev_queue *txq,
-
spinlock_t *root_lock)
-
{
-
int ret = NETDEV_TX_BUSY;
-
-
/*And release qdisc */
-
spin_unlock(root_lock);
-
-
HARD_TX_LOCK(dev, txq, smp_processor_id());
-
//设备没有被停止,且发送队列没有被冻结
-
if(!netif_tx_queue_stopped(txq)&&!netif_tx_queue_frozen(txq))
-
ret = dev_hard_start_xmit(skb, dev, txq); //发送数据包
-
-
HARD_TX_UNLOCK(dev, txq);
-
-
spin_lock(root_lock);
-
-
if(dev_xmit_complete(ret)){
-
/* Driver sent out skb successfully or skb was consumed */
-
//发送成功,返回qdisc新的队列产的
-
ret = qdisc_qlen(q);
-
}elseif(ret == NETDEV_TX_LOCKED){
-
/* Driver try lock failed */
-
//锁冲突
-
ret = handle_dev_cpu_collision(skb, txq, q);
-
}else{
-
/* Driver returned NETDEV_TX_BUSY - requeue skb */
-
if(unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-
printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-
dev->name, ret, q->q.qlen);
-
ret = dev_requeue_skb(skb, q);
-
}
-
-
if(ret &&(netif_tx_queue_stopped(txq)||
-
netif_tx_queue_frozen(txq)))
-
ret = 0;
-
-
return ret;
-
}
-
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-
struct netdev_queue *txq)
-
{
-
const struct net_device_ops *ops = dev->netdev_ops;
-
int rc = NETDEV_TX_OK;
-
-
if(likely(!skb->next)){
-
if(!list_empty(&ptype_all))
-
dev_queue_xmit_nit(skb, dev); //ptype_all上的协议处理,如tcpdump
-
-
/*
-
*If device doesnt need skb->dst, release it rightnowwhile
-
* its hot in this cpu cache
-
*/
-
/* device 不需要dst */
-
if(dev->priv_flags & IFF_XMIT_DST_RELEASE)
-
skb_dst_drop(skb);
-
skb_orphan_try(skb);
-
-
if(netif_needs_gso(dev, skb)){
-
/* 需要scatter gather功能 */
-
if(unlikely(dev_gso_segment(skb)))
-
goto out_kfree_skb;
-
if(skb->next)
-
goto gso;
-
}else{
-
//不需要scatter gather
-
if(skb_needs_linearize(skb, dev)&&
-
__skb_linearize(skb))
-
goto out_kfree_skb;
-
-
/*If packet isnot checksummed and device does not
-
* support checksumming for this protocol, complete
-
* checksumming here.
-
*/
-
if(skb->ip_summed == CHECKSUM_PARTIAL){
-
/* 计算checksum */
-
skb_set_transport_header(skb, skb->csum_start -
-
skb_headroom(skb));
-
if(!dev_can_checksum(dev, skb)&&
-
skb_checksum_help(skb))
-
goto out_kfree_skb;
-
}
-
}
-
rc = ops->ndo_start_xmit(skb, dev);
-
if(rc == NETDEV_TX_OK)
-
txq_trans_update(txq);
-
return rc;
-
}
-
-
gso:
-
/* 循环发送数据包 */
-
do{
-
struct sk_buff *nskb = skb->next;
-
-
skb->next= nskb->next;
-
nskb->next=NULL;
-
-
/*
-
*If device doesnt need nskb->dst, release it rightnowwhile
-
* its hot in this cpu cache
-
*/
-
if(dev->priv_flags & IFF_XMIT_DST_RELEASE)
-
skb_dst_drop(nskb);
-
-
rc = ops->ndo_start_xmit(nskb, dev);
-
if(unlikely(rc != NETDEV_TX_OK)){
-
if(rc &~NETDEV_TX_MASK)
-
goto out_kfree_gso_skb;
-
nskb->next= skb->next;
-
skb->next= nskb;
-
return rc;
-
}
-
txq_trans_update(txq);
-
if(unlikely(netif_tx_queue_stopped(txq)&& skb->next))
-
return NETDEV_TX_BUSY;
-
}while(skb->next);
-
-
out_kfree_gso_skb:
-
if(likely(skb->next==NULL))
-
skb->destructor = DEV_GSO_CB(skb)->destructor;
-
out_kfree_skb:
-
kfree_skb(skb);
-
return rc;
-
}
-
static const struct net_device_ops e1000_netdev_ops ={
-
.ndo_open = e1000_open,
-
.ndo_stop = e1000_close,
-
.ndo_start_xmit = e1000_xmit_frame,
-
.ndo_get_stats = e1000_get_stats,
-
.ndo_set_rx_mode = e1000_set_rx_mode,
-
.ndo_set_mac_address = e1000_set_mac,
-
.ndo_tx_timeout = e1000_tx_timeout,
-
.ndo_change_mtu = e1000_change_mtu,
-
.ndo_do_ioctl = e1000_ioctl,
-
.ndo_validate_addr = eth_validate_addr,
-
-
.ndo_vlan_rx_register = e1000_vlan_rx_register,
-
.ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid,
-
.ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid,
-
#ifdef CONFIG_NET_POLL_CONTROLLER
-
.ndo_poll_controller = e1000_netpoll,
-
#endif
-
};