Linux内核TCP收到ACK的处理

最新推荐文章于 2025-01-02 00:30:00 发布

jedihy

最新推荐文章于 2025-01-02 00:30:00 发布

阅读量1.9k

点赞数

CC 4.0 BY-SA版权

文章标签： TCP linux内核网络

本文链接：https://blog.youkuaiyun.com/jedihy/article/details/17043373

TCP层每次收到一个ACK的报文就会进入这个函数做决策。

先直接上注释的代码：

static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
{
	struct inet_connection_sock *icsk = inet_csk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	//这两行是几乎每个函数都有的，获取tcp_sock结构体，得到当前tcp流的信息
	u32 prior_snd_una = tp->snd_una;
	//将最小未确认的序号保存到prior_snd_una
	u32 ack_seq = TCP_SKB_CB(skb)->seq;
	//ack包的序号
	u32 ack = TCP_SKB_CB(skb)->ack_seq;
	//ack确认的序号
	u32 prior_in_flight;
	//之前的in_flight包
	u32 prior_fackets;
	int prior_packets;
	int frto_cwnd = 0;

	/* If the ack is older than previous acks
	 * then we can probably ignore it.
	 */

	if (before(ack, prior_snd_una))
		goto old_ack;
	 //ack包的序号比最小未确认的序号还要小的话，则认为则是老的ack

	/* If the ack includes data we haven't sent yet, discard
	 * this segment (RFC793 Section 3.9).
	 */

	if (after(ack, tp->snd_nxt))
		goto invalid_ack;
	 //如果ack序号比要发的数据包序号还大，则丢弃
	if (after(ack, prior_snd_una))
		flag |= FLAG_SND_UNA_ADVANCED;
	//如果序号>=最小为确认的包，则flag置FLAG_SND_UNA_ADVANCED，这里使用的是flag与FLAG_SND_UNA_ADVANCED相或，实际就是对应位置一

	if (sysctl_tcp_abc) {//如果启用abc机制
		if (icsk->icsk_ca_state < TCP_CA_CWR)//OPEN和DISORDER状态
			tp->bytes_acked += ack - prior_snd_una;
			//这个ack取人的字节数
		else if (icsk->icsk_ca_state == TCP_CA_Loss)//假定超时，则认为只有一个包离开网络
			/* we assume just one segment left network */
			tp->bytes_acked += min(ack - prior_snd_una,
					       tp->mss_cache);
	}

	prior_fackets = tp->fackets_out;
	prior_in_flight = tcp_packets_in_flight(tp);
	//packets_out - sacked_out- lost_out + retrans_out 
	飞行中的包 = 发出的未确认的 - 接收方通知丢掉的 - 超时丢掉的 + 重传发出的

	//SLOWPATH处理 不太懂
	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
		/* Window is constant, pure forward advance.
		 * No more checks are required.
		 * Note, we use the fact that SND.UNA>=SND.WL2.
		 */
		tcp_update_wl(tp, ack_seq);
		tp->snd_una = ack;
		flag |= FLAG_WIN_UPDATE;

		tcp_ca_event(sk, CA_EVENT_FAST_ACK);

		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
	} else {
		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
			flag |= FLAG_DATA;
		else
			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);

		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);

		if (TCP_SKB_CB(skb)->sacked)
			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);

		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
			flag |= FLAG_ECE;

		tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
	}

	/* We passed data and got it acked, remove any soft error
	 * log. Something worked...
	 */
	sk->sk_err_soft = 0;
	icsk->icsk_probes_out = 0;
	tp->rcv_tstamp = tcp_time_stamp;
	prior_packets = tp->packets_out;
	if (!prior_packets)
		goto no_queue;//如果没发数据却收到了ACK，则证明是零窗口通知，跳转到no_queue

	/* See if we can take anything off of the retransmit queue. */
	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
	//尝试清除重传队列里面已经确认的包

	if (tp->frto_counter)
		frto_cwnd = tcp_process_frto(sk, flag);
	/* Guarantee sacktag reordering detection against wrap-arounds */
	if (before(tp->frto_highmark, tp->snd_una))
		tp->frto_highmark = 0;

	if (tcp_ack_is_dubious(sk, flag)) {
        //ACK一定是重复的、SACKed或ECE、或者不在TCP_CA_OPEN阶段就可以说是dubious 可疑的
		/* Advance CWND, if state allows this. */
		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
		    tcp_may_raise_cwnd(sk, flag))
                    //tcp_may_raise_cwnd是判断是否可以增窗，在后面再详细解释
			tcp_cong_avoid(sk, ack, prior_in_flight);//调用增窗函数
			tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,flag);
                        //这个函数是拥塞控制状态机的核心
	} else {
		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
                //不可疑的ACK，并且是确认新数据，则增窗，不进入tcp_fastretrans_alert
			tcp_cong_avoid(sk, ack, prior_in_flight);
	}
	//neigh算法
	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
		dst_confirm(sk->sk_dst_cache);

	return 1;

no_queue:
	/* If this ack opens up a zero window, clear backoff.  It was
	 * being used to time the probes, and is probably far higher than
	 * it needs to be for normal retransmission.
	 */
	if (tcp_send_head(sk))
		tcp_ack_probe(sk);
	return 1;

invalid_ack:
	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
	return -1;

old_ack:
	if (TCP_SKB_CB(skb)->sacked) {
		tcp_sacktag_write_queue(sk, skb, prior_snd_una);
		if (icsk->icsk_ca_state == TCP_CA_Open)
			tcp_try_keep_open(sk);
	}

	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
	return 0;
}

下面来看看tcp_may_raise_cwnd函数

static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
		!((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
}

实际上就是在判断下列条件为真时return 1

非ECE包
处于慢启动并且非CWR或Recovery状态

再来看看负责增窗的tcp_cong_avoid函数

static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
        //调用当前拥塞协议的cong_avoid函数，在本身协议里面没有具体实现
	tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
        //记录一下增窗时间
}