[gPXE-devel] [PATCHv4 10/10] [tcp] Treat ACKs as sent only when successfully transmitted

Guo-Fu Tseng cooldavid at cooldavid.org
Sat Jul 17 21:26:15 EDT 2010


From: Michael Brown <mcb30 at ipxe.org>

iPXE currently forces sending (i.e. sends a pure ACK even in the
absence of fresh data to send) only in response to packets that
consume sequence space or that lie outside of the receive window.
This ignores the possibility that a previous ACK was not actually sent
(due to, for example, the retransmission timer running).

This does not cause incorrect behaviour, but does cause unnecessary
retransmissions from our peer.  For example:

 1. Peer sends final data packet (ack      106 seq 521..523)
 2. We send FIN                  (seq 106..107 ack      523)
 3. Peer sends FIN               (ack      106 seq 523..524)
 4. We send nothing since retransmission timer is running for our FIN
 5. Peer ACKs our FIN            (ack      107 seq 524..524)
 6. We send nothing since this packet consumes no sequence space
 7. Peer retransmits FIN         (ack      107 seq 523..524)
 8. We ACK peer's FIN            (seq 107..107 ack      524)

What should happen at step (6) is that we should ACK the peer's FIN,
since we can deduce that we have never sent this ACK.

Fix by maintaining an "ACK pending" flag that is set whenever we are
made aware that our peer needs an ACK (whether by consuming sequence
space or by sending a packet that appears out of order), and is
cleared only when the ACK packet has been transmitted.

Reported-by: Piotr Jaroszyński <p.jaroszynski at gmail.com>
Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Merged-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
---
 src/net/tcp.c |   49 +++++++++++++++++++++++--------------------------
 1 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index b5f48c4..230dce0 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -107,6 +107,8 @@ enum tcp_flags {
 	TCP_TS_ENABLED  = 0x0002,
 	/** Activity started **/
 	TCP_ACT_STARTED = 0x0004,
+	/** TCP acknowledgment is pending */
+	TCP_ACK_PENDING = 0x0008,
 };
 
 /**
@@ -434,15 +436,14 @@ static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len,
  * Transmit any outstanding data
  *
  * @v tcp		TCP connection
- * @v force_send	Force sending of packet
- * 
+ *
  * Transmits any outstanding data on the connection.
  *
  * Note that even if an error is returned, the retransmission timer
  * will have been started if necessary, and so the stack will
  * eventually attempt to retransmit the failed packet.
  */
-static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
+static int tcp_xmit ( struct tcp_connection *tcp ) {
 	struct io_buffer *iobuf;
 	struct tcp_header *tcphdr;
 	struct tcp_mss_option *mssopt;
@@ -457,11 +458,9 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 
 	/*
 	 * If retransmission timer is already running,
-	 * and not force-sending ACKs without any data,
 	 * do nothing
 	 */
-	if ( timer_running ( &tcp->timer ) &&
-	     ! ( list_empty ( &tcp->queue ) && force_send ) )
+	if ( timer_running ( &tcp->timer ) )
 		return 0;
 
 	/* Calculate both the actual (payload) and sequence space
@@ -481,7 +480,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 	tcp->snd_sent = seq_len;
 
 	/* If we have nothing to transmit, stop now */
-	if ( ( seq_len == 0 ) && ! force_send )
+	if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
 		return 0;
 
 	/* If we are transmitting anything that requires
@@ -567,6 +566,9 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 	if ( flags & TCP_FIN )
 		tcp->snd_fin_seq = tcp->snd_seq;
 
+	/* Clear ACK-pending flag */
+	tcp->flags &= ~TCP_ACK_PENDING;
+
 	return 0;
 }
 
@@ -602,7 +604,7 @@ static void tcp_expired ( struct retry_timer *timer, int over ) {
 		tcp_xfer_shutdown ( tcp, -ETIMEDOUT );
 	} else {
 		/* Otherwise, retransmit the packet */
-		tcp_xmit ( tcp, 0 );
+		tcp_xmit ( tcp );
 	}
 }
 
@@ -760,6 +762,7 @@ static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
 	} else {
 		tcp->rcv_win = 0;
 	}
+	tcp->flags |= TCP_ACK_PENDING;
 }
 
 /**
@@ -980,7 +983,6 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	struct tcp_options options;
 	size_t hlen;
 	uint16_t csum;
-	uint32_t start_seq;
 	uint32_t seq;
 	uint32_t ack;
 	uint32_t win;
@@ -1017,10 +1019,10 @@ static int tcp_rx ( struct io_buffer *iobuf,
 		rc = -EINVAL;
 		goto discard;
 	}
-	
+
 	/* Parse parameters from header and strip header */
 	tcp = tcp_demux ( tcphdr->dest );
-	start_seq = seq = ntohl ( tcphdr->seq );
+	seq = ntohl ( tcphdr->seq );
 	ack = ntohl ( tcphdr->ack );
 	win = ntohs ( tcphdr->win );
 	flags = tcphdr->flags;
@@ -1055,6 +1057,12 @@ static int tcp_rx ( struct io_buffer *iobuf,
 		}
 	}
 
+	/* Force an ACK if this packet is out of order */
+	if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
+	     ( seq != tcp->rcv_ack ) ) {
+		tcp->flags |= TCP_ACK_PENDING;
+	}
+
 	/* Handle SYN, if present */
 	if ( flags & TCP_SYN ) {
 		tcp_rx_syn ( tcp, seq, &options );
@@ -1084,19 +1092,8 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	/* Dump out any state change as a result of the received packet */
 	tcp_dump_state ( tcp );
 
-	/* Send out any pending data.  We force sending a reply if either
-	 *
-	 *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
-	 *  b) either end of the packet was outside the receive window
-	 *
-	 * Case (b) enables us to support TCP keepalives using
-	 * zero-length packets, which we would otherwise ignore.  Note
-	 * that for case (b), we need *only* consider zero-length
-	 * packets, since non-zero-length packets will already be
-	 * caught by case (a).
-	 */
-	tcp_xmit ( tcp, ( ( start_seq != seq ) ||
-			  ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
+	/* Send out any pending data */
+	tcp_xmit ( tcp );
 
 	/* Deliver data to application, if any */
 	if ( iobuf && ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
@@ -1159,7 +1156,7 @@ static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) {
 	tcp_xfer_shutdown ( tcp, rc );
 
 	/* Transmit FIN, if possible */
-	tcp_xmit ( tcp, 0 );
+	tcp_xmit ( tcp );
 }
 
 /**
@@ -1201,7 +1198,7 @@ static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer,
 	list_add_tail ( &iobuf->list, &tcp->queue );
 
 	/* Transmit data, if possible */
-	tcp_xmit ( tcp, 0 );
+	tcp_xmit ( tcp );
 
 	return 0;
 }
-- 
1.7.1



More information about the gPXE-devel mailing list