前文说过,发送RST会快速关闭一条TCP连接。RST的发送由tcp_send_active_reset函数完成:
2586 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2587 {
2588 struct sk_buff *skb;
2589
2590 /* NOTE: No TCP options attached and we never retransmit this. */
2591 skb = alloc_skb(MAX_TCP_HEADER, priority);
2592 if (!skb) {
2593 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2594 return;
2595 }
2596
2597 /* Reserve space for headers and prepare control bits. */
2598 skb_reserve(skb, MAX_TCP_HEADER);
2599 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2600 TCPHDR_ACK | TCPHDR_RST);
2601 /* Send it off. */
2602 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2603 if (tcp_transmit_skb(sk, skb, 0, priority))
2604 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
2605
2606 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2607 }
RST报文没有被放入发送队列,丢失时不会重传。
发送RST的时机有:
(1)调用close系统调用时socket仍有未读数据:
2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2101 if (unlikely(tcp_sk(sk)->repair)) {
2102 sk->sk_prot->disconnect(sk, 0);
2103 } else if (data_was_unread) {
2104 /* Unread data was tossed, zap the connection. */
2105 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
2106 tcp_set_state(sk, TCP_CLOSE);
2107 tcp_send_active_reset(sk, sk->sk_allocation);
...
(2)在FIN_WAIT2状态下调用close系统调用且socket用TCP_LINGER2选项设置为负值:
2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2183 if (sk->sk_state == TCP_FIN_WAIT2) {
2184 struct tcp_sock *tp = tcp_sk(sk);
2185 if (tp->linger2 < 0) {
2186 tcp_set_state(sk, TCP_CLOSE);
2187 tcp_send_active_reset(sk, GFP_ATOMIC);
...
(3)孤儿套接字(orphan sock)的数量超过上限,或发送缓存的内存占用过大:
2059 void tcp_close(struct sock *sk, long timeout)
2060 {
...
2202 if (sk->sk_state != TCP_CLOSE) {
2203 sk_mem_reclaim(sk);
2204 if (tcp_check_oom(sk, 0)) {
2205 tcp_set_state(sk, TCP_CLOSE);
2206 tcp_send_active_reset(sk, GFP_ATOMIC);
55 static int tcp_out_of_resources(struct sock *sk, int do_reset)
56 {
57 struct tcp_sock *tp = tcp_sk(sk);
58 int shift = 0;
59
60 /* If peer does not open window for long time, or did not transmit
61 * anything for long time, penalize it. */
62 if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
63 shift++;
64
65 /* If some dubious ICMP arrived, penalize even more. */
66 if (sk->sk_err_soft)
67 shift++;
68
69 if (tcp_check_oom(sk, shift)) {
70 /* Catch exceptional cases, when connection requires reset.
71 * 1. Last segment was sent recently. */
72 if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
73 /* 2. Window is closed. */
74 (!tp->snd_wnd && !tp->packets_out))
75 do_reset = 1;
76 if (do_reset)
77 tcp_send_active_reset(sk, GFP_ATOMIC);
78 tcp_done(sk);
...
(4)连接出现异常调用tcp_disconnect断开连接时状态非法或仍有未读数据时:
2240 int tcp_disconnect(struct sock *sk, int flags)
2241 {
...
2256 } else if (tcp_need_reset(old_state) ||
2257 (tp->snd_nxt != tp->write_seq &&
2258 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
2259 /* The last check adjusts for discrepancy of Linux wrt. RFC
2260 * states
2261 */
2262 tcp_send_active_reset(sk, gfp_any());
...
(5)keepalive定时器超时时socket处于TCP_FIN_WAIT2状态且已经是孤儿socket:
<pre name="code" class="cpp">558 static void tcp_keepalive_timer (unsigned long data)
559 {
...
578 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {
579 if (tp->linger2 >= 0) {
580 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN;
581
582 if (tmo > 0) {
583 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
584 goto out;
585 }
586 }
587 tcp_send_active_reset(sk, GFP_ATOMIC);
588 goto death;
589 }
...
(6)keepalive定时器超时时进程通过TCP_USER_TIMEOUT socket选项设置的时间也超过了,或探测次数超过限制:558 static void tcp_keepalive_timer (unsigned long data)
559 {
...
600 elapsed = keepalive_time_elapsed(tp);
601
602 if (elapsed >= keepalive_time_when(tp)) {
603 /* If the TCP_USER_TIMEOUT option is enabled, use that
604 * to determine when to timeout instead.
605 */
606 if ((icsk->icsk_user_timeout != 0 &&
607 elapsed >= icsk->icsk_user_timeout &&
608 icsk->icsk_probes_out > 0) ||
609 (icsk->icsk_user_timeout == 0 &&
610 icsk->icsk_probes_out >= keepalive_probes(tp))) {
611 tcp_send_active_reset(sk, GFP_ATOMIC);
...
在收到RST包时,tcp_validate_incoming函数会对其进行处理:
4985 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
4986 const struct tcphdr *th, int syn_inerr)
4987 {
...
5018 if (th->rst) {
5019 /* RFC 5961 3.2 :
5020 * If sequence number exactly matches RCV.NXT, then
5021 * RESET the connection
5022 * else
5023 * Send a challenge ACK
5024 */
5025 if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
5026 tcp_reset(sk);
5027 else
5028 tcp_send_challenge_ack(sk);
5029 goto discard;
5030 }
...
在RST报文的seq合法时会调用tcp_reset函数:
3745 void tcp_reset(struct sock *sk)
3746 {
3747 /* We want the right error as BSD sees it (and indeed as we do). */
3748 switch (sk->sk_state) {
3749 case TCP_SYN_SENT:
3750 sk->sk_err = ECONNREFUSED;
3751 break;
3752 case TCP_CLOSE_WAIT:
3753 sk->sk_err = EPIPE;
3754 break;
3755 case TCP_CLOSE:
3756 return;
3757 default:
3758 sk->sk_err = ECONNRESET;
3759 }
3760 /* This barrier is coupled with smp_rmb() in tcp_poll() */
3761 smp_wmb();
3762
3763 if (!sock_flag(sk, SOCK_DEAD))
3764 sk->sk_error_report(sk); //唤醒进程
3765
3766 tcp_done(sk); //关闭本端TCP连接,清除定时器,状态跳转为TCP_CLOSE
3767 }
tcp_done函数:
3327 void tcp_done(struct sock *sk)
3328 {
3329 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
3330
3331 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
3332 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
3333
3334 tcp_set_state(sk, TCP_CLOSE);
3335 tcp_clear_xmit_timers(sk);
3336 if (req != NULL)
3337 reqsk_fastopen_remove(sk, req, false);
3338
3339 sk->sk_shutdown = SHUTDOWN_MASK;
3340
3341 if (!sock_flag(sk, SOCK_DEAD))
3342 sk->sk_state_change(sk);
3343 else
3344 inet_csk_destroy_sock(sk);
3345 }
在TCP收到RST报文后,连接被复位,不再允许读写,进程调用tcp_recvmsg收数据时会产生错误:
1545 int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1546 size_t len, int nonblock, int flags, int *addr_len)
1547 {
...
1671 if (sk->sk_err) {
1672 copied = sock_error(sk);
1673 break;
1674 }
...
同样,调用tcp_sendmsg发送数据时也会产生错误:1016 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1017 size_t size)
1018 {
...
1074 err = -EPIPE;
1075 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1076 goto out_err;
...
可见,使用RST快速复位TCP连接可能会导致数据丢失。TCP连接只有在出现异常时才会使用RST进行关闭。