TCP层每次收到一个ACK的报文就会进入这个函数做决策。
先直接上注释的代码:
static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
//这两行是几乎每个函数都有的,获取tcp_sock结构体,得到当前tcp流的信息
u32 prior_snd_una = tp->snd_una;
//将最小未确认的序号保存到prior_snd_una
u32 ack_seq = TCP_SKB_CB(skb)->seq;
//ack包的序号
u32 ack = TCP_SKB_CB(skb)->ack_seq;
//ack确认的序号
u32 prior_in_flight;
//之前的in_flight包
u32 prior_fackets;
int prior_packets;
int frto_cwnd = 0;
/* If the ack is older than previous acks
* then we can probably ignore it.
*/
if (before(ack, prior_snd_una))
goto old_ack;
//ack包的序号比最小未确认的序号还要小的话,则认为则是老的ack
/* If the ack includes data we haven't sent yet, discard
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
goto invalid_ack;
//如果ack序号比要发的数据包序号还大,则丢弃
if (after(ack, prior_snd_una))
flag |= FLAG_SND_UNA_ADVANCED;
//如果序号>=最小为确认的包,则flag置FLAG_SND_UNA_ADVANCED,这里使用的是flag与FLAG_SND_UNA_ADVANCED相或,实际就是对应位置一
if (sysctl_tcp_abc) {//如果启用abc机制
if (icsk->icsk_ca_state < TCP_CA_CWR)//OPEN和DISORDER状态
tp->bytes_acked += ack - prior_snd_una;
//这个ack取人的字节数
else if (icsk->icsk_ca_state == TCP_CA_Loss)//假定超时,则认为只有一个包离开网络
/* we assume just one segment left network */
tp->bytes_acked += min(ack - prior_snd_una,
tp->mss_cache);
}
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
//packets_out - sacked_out- lost_out + retrans_out
飞行中的包 = 发出的未确认的 - 接收方通知丢掉的 - 超时丢掉的 + 重传发出的
//SLOWPATH处理 不太懂
if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
* Note, we use the fact that SND.UNA>=SND.WL2.
*/
tcp_update_wl(tp, ack_seq);
tp->snd_una = ack;
flag |= FLAG_WIN_UPDATE;
tcp_ca_event(sk, CA_EVENT_FAST_ACK);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
}
/* We passed data and got it acked, remove any soft error
* log. Something worked...
*/
sk->sk_err_soft = 0;
icsk->icsk_probes_out = 0;
tp->rcv_tstamp = tcp_time_stamp;
prior_packets = tp->packets_out;
if (!prior_packets)
goto no_queue;//如果没发数据却收到了ACK,则证明是零窗口通知,跳转到no_queue
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
//尝试清除重传队列里面已经确认的包
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
/* Guarantee sacktag reordering detection against wrap-arounds */
if (before(tp->frto_highmark, tp->snd_una))
tp->frto_highmark = 0;
if (tcp_ack_is_dubious(sk, flag)) {
//ACK一定是重复的、SACKed或ECE、或者不在TCP_CA_OPEN阶段就可以说是dubious 可疑的
/* Advance CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
//tcp_may_raise_cwnd是判断是否可以增窗,在后面再详细解释
tcp_cong_avoid(sk, ack, prior_in_flight);//调用增窗函数
tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,flag);
//这个函数是拥塞控制状态机的核心
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
//不可疑的ACK,并且是确认新数据,则增窗,不进入tcp_fastretrans_alert
tcp_cong_avoid(sk, ack, prior_in_flight);
}
//neigh算法
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
dst_confirm(sk->sk_dst_cache);
return 1;
no_queue:
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
if (tcp_send_head(sk))
tcp_ack_probe(sk);
return 1;
invalid_ack:
SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
return -1;
old_ack:
if (TCP_SKB_CB(skb)->sacked) {
tcp_sacktag_write_queue(sk, skb, prior_snd_una);
if (icsk->icsk_ca_state == TCP_CA_Open)
tcp_try_keep_open(sk);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
return 0;
}
下面来看看tcp_may_raise_cwnd函数
static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
const struct tcp_sock *tp = tcp_sk(sk);
return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
!((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
}
实际上就是在判断下列条件为真时return 1
- 非ECE包
- 处于慢启动并且非CWR或Recovery状态
再来看看负责增窗的tcp_cong_avoid函数
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
//调用当前拥塞协议的cong_avoid函数,在本身协议里面没有具体实现
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
//记录一下增窗时间
}