[gPXE git] mainline: 13 commits to master (b764464..b3858e3)

git at etherboot.org git at etherboot.org
Sun Aug 1 16:32:46 EDT 2010


In the Main gPXE repository, branch master has been updated.
      adds  b3858e3 [tcp] Allow out-of-order receive queue to be discarded
      adds  e8af7f9 [malloc] Add cache discard mechanism
      adds  85f92fe [tcp] Fix a 64bit compile time error
      adds  62b461e [tcp] Handle out-of-order received packets
      adds  7c274ba [tcp] Treat ACKs as sent only when successfully transmitted
      adds  7cf46f8 [tcp] Merge boolean flags into a single "flags" field
      adds  2ad0cd4 [tcp] Use a dedicated timer for the TIME_WAIT state
      adds  02e1db5 [hci] Continue processing while prompting for shell banner
      adds  99d3bd6 [cmdline] Fix inconsistent and ugly code formatting in shell_banner()
      adds  419aa5b [tcp] Randomize TCP bind port
      adds  b3e7603 [tcp] Fix typos by changing ntohl() to htonl() where appropriate
      adds  69c0f8a [tcp] Store local port in host byte order
      adds  ea51761 [tcp] Update received sequence number before delivering received data
      from  b764464 [comboot] Run com32 programs with a valid IDT

Summary of changes:
 src/core/getkey.c         |    2 +-
 src/core/malloc.c         |  114 ++++++++------
 src/hci/shell_banner.c    |   21 +--
 src/include/console.h     |    1 +
 src/include/gpxe/list.h   |   12 ++
 src/include/gpxe/malloc.h |   17 ++
 src/include/gpxe/tcp.h    |   31 ++++-
 src/net/tcp.c             |  375 +++++++++++++++++++++++++++++++++------------
 8 files changed, 414 insertions(+), 159 deletions(-)


- Log -----------------------------------------------------------------
------
commit b3858e358b7a6bc921eefa9158cdd19ee91e1a95
Author: Michael Brown <mcb30 at ipxe.org>
Date: Wed Jul 21 12:01:50 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Allow out-of-order receive queue to be discarded

Allow packets in the receive queue to be discarded in order to free up
memory.  This avoids a potential deadlock condition in which the
missing packet can never be received because the receive queue is
occupying all of the memory available for further RX buffers.

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/include/gpxe/list.h |   12 ++++++++++++
 src/net/tcp.c           |   41 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/src/include/gpxe/list.h b/src/include/gpxe/list.h
index 22ba201..02b0f6e 100644
--- a/src/include/gpxe/list.h
+++ b/src/include/gpxe/list.h
@@ -163,6 +163,18 @@ static inline int list_empty ( const struct list_head *head ) {
 	      pos = list_entry ( pos->member.next, typeof ( *pos ), member ) )
 
 /**
+ * Iterate over entries in a list in reverse order
+ *
+ * @v pos	The type * to use as a loop counter
+ * @v head	The head for your list
+ * @v member	The name of the list_struct within the struct
+ */
+#define list_for_each_entry_reverse( pos, head, member )		      \
+	for ( pos = list_entry ( (head)->prev, typeof ( *pos ), member );     \
+	      &pos->member != (head);					      \
+	      pos = list_entry ( pos->member.prev, typeof ( *pos ), member ) )
+
+/**
  * Iterate over entries in a list, safe against deletion of entries
  *
  * @v pos	The type * to use as a loop counter
diff --git a/src/net/tcp.c b/src/net/tcp.c
index 637bfce..d2b2885 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -1004,14 +1004,21 @@ static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
  */
 static void tcp_process_rx_queue ( struct tcp_connection *tcp ) {
 	struct io_buffer *iobuf;
-	struct io_buffer *tmp;
 	struct tcp_rx_queued_header *tcpqhdr;
 	uint32_t seq;
 	unsigned int flags;
 	size_t len;
 
-	/* Process all applicable received buffers */
-	list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
+	/* Process all applicable received buffers.  Note that we
+	 * cannot use list_for_each_entry() to iterate over the RX
+	 * queue, since tcp_discard() may remove packets from the RX
+	 * queue while we are processing.
+	 */
+	while ( ! list_empty ( &tcp->rx_queue ) ) {
+		list_for_each_entry ( iobuf, &tcp->rx_queue, list )
+			break;
+
+		/* Stop processing when we hit the first gap */
 		tcpqhdr = iobuf->data;
 		if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 )
 			break;
@@ -1183,6 +1190,34 @@ struct tcpip_protocol tcp_protocol __tcpip_protocol = {
 	.tcpip_proto = IP_TCP,
 };
 
+/**
+ * Discard some cached TCP data
+ *
+ * @ret discarded	Number of cached items discarded
+ */
+static unsigned int tcp_discard ( void ) {
+	struct tcp_connection *tcp;
+	struct io_buffer *iobuf;
+	unsigned int discarded = 0;
+
+	/* Try to drop one queued RX packet from each connection */
+	list_for_each_entry ( tcp, &tcp_conns, list ) {
+		list_for_each_entry_reverse ( iobuf, &tcp->rx_queue, list ) {
+			list_del ( &iobuf->list );
+			free_iob ( iobuf );
+			discarded++;
+			break;
+		}
+	}
+
+	return discarded;
+}
+
+/** TCP cache discarder */
+struct cache_discarder tcp_cache_discarder __cache_discarder = {
+	.discard = tcp_discard,
+};
+
 /***************************************************************************
  *
  * Data transfer interface

------
commit e8af7f9b3df9b3fe69c2c8e139248f9b9ff689fa
Author: Michael Brown <mcb30 at ipxe.org>
Date: Wed Jul 21 11:58:50 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[malloc] Add cache discard mechanism

Add a facility allowing cached data to be discarded in order to
satisfy memory allocations that would otherwise fail.

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/core/malloc.c         |  114 +++++++++++++++++++++++++++------------------
 src/include/gpxe/malloc.h |   17 +++++++
 2 files changed, 85 insertions(+), 46 deletions(-)

diff --git a/src/core/malloc.c b/src/core/malloc.c
index 8b0bc24..61ea089 100644
--- a/src/core/malloc.c
+++ b/src/core/malloc.c
@@ -86,6 +86,21 @@ size_t freemem;
 static char heap[HEAP_SIZE] __attribute__ (( aligned ( __alignof__(void *) )));
 
 /**
+ * Discard some cached data
+ *
+ * @ret discarded	Number of cached items discarded
+ */
+static unsigned int discard_cache ( void ) {
+	struct cache_discarder *discarder;
+	unsigned int discarded = 0;
+
+	for_each_table_entry ( discarder, CACHE_DISCARDERS ) {
+		discarded += discarder->discard();
+	}
+	return discarded;
+}
+
+/**
  * Allocate a memory block
  *
  * @v size		Requested size
@@ -112,55 +127,62 @@ void * alloc_memblock ( size_t size, size_t align ) {
 	align_mask = ( align - 1 ) | ( MIN_MEMBLOCK_SIZE - 1 );
 
 	DBG ( "Allocating %#zx (aligned %#zx)\n", size, align );
-
-	/* Search through blocks for the first one with enough space */
-	list_for_each_entry ( block, &free_blocks, list ) {
-		pre_size = ( - virt_to_phys ( block ) ) & align_mask;
-		post_size = block->size - pre_size - size;
-		if ( post_size >= 0 ) {
-			/* Split block into pre-block, block, and
-			 * post-block.  After this split, the "pre"
-			 * block is the one currently linked into the
-			 * free list.
-			 */
-			pre   = block;
-			block = ( ( ( void * ) pre   ) + pre_size );
-			post  = ( ( ( void * ) block ) + size     );
-			DBG ( "[%p,%p) -> [%p,%p) + [%p,%p)\n", pre,
-			      ( ( ( void * ) pre ) + pre->size ), pre, block,
-			      post, ( ( ( void * ) pre ) + pre->size ) );
-			/* If there is a "post" block, add it in to
-			 * the free list.  Leak it if it is too small
-			 * (which can happen only at the very end of
-			 * the heap).
-			 */
-			if ( ( size_t ) post_size >= MIN_MEMBLOCK_SIZE ) {
-				post->size = post_size;
-				list_add ( &post->list, &pre->list );
+	while ( 1 ) {
+		/* Search through blocks for the first one with enough space */
+		list_for_each_entry ( block, &free_blocks, list ) {
+			pre_size = ( - virt_to_phys ( block ) ) & align_mask;
+			post_size = block->size - pre_size - size;
+			if ( post_size >= 0 ) {
+				/* Split block into pre-block, block, and
+				 * post-block.  After this split, the "pre"
+				 * block is the one currently linked into the
+				 * free list.
+				 */
+				pre   = block;
+				block = ( ( ( void * ) pre   ) + pre_size );
+				post  = ( ( ( void * ) block ) + size     );
+				DBG ( "[%p,%p) -> [%p,%p) + [%p,%p)\n", pre,
+				      ( ( ( void * ) pre ) + pre->size ),
+				      pre, block, post,
+				      ( ( ( void * ) pre ) + pre->size ) );
+				/* If there is a "post" block, add it in to
+				 * the free list.  Leak it if it is too small
+				 * (which can happen only at the very end of
+				 * the heap).
+				 */
+				if ( (size_t) post_size >= MIN_MEMBLOCK_SIZE ) {
+					post->size = post_size;
+					list_add ( &post->list, &pre->list );
+				}
+				/* Shrink "pre" block, leaving the main block
+				 * isolated and no longer part of the free
+				 * list.
+				 */
+				pre->size = pre_size;
+				/* If there is no "pre" block, remove it from
+				 * the list.  Also remove it (i.e. leak it) if
+				 * it is too small, which can happen only at
+				 * the very start of the heap.
+				 */
+				if ( pre_size < MIN_MEMBLOCK_SIZE )
+					list_del ( &pre->list );
+				/* Update total free memory */
+				freemem -= size;
+				/* Return allocated block */
+				DBG ( "Allocated [%p,%p)\n", block,
+				      ( ( ( void * ) block ) + size ) );
+				return block;
 			}
-			/* Shrink "pre" block, leaving the main block
-			 * isolated and no longer part of the free
-			 * list.
-			 */
-			pre->size = pre_size;
-			/* If there is no "pre" block, remove it from
-			 * the list.  Also remove it (i.e. leak it) if
-			 * it is too small, which can happen only at
-			 * the very start of the heap.
-			 */
-			if ( pre_size < MIN_MEMBLOCK_SIZE )
-				list_del ( &pre->list );
-			/* Update total free memory */
-			freemem -= size;
-			/* Return allocated block */
-			DBG ( "Allocated [%p,%p)\n", block,
-			      ( ( ( void * ) block ) + size ) );
-			return block;
 		}
-	}
 
-	DBG ( "Failed to allocate %#zx (aligned %#zx)\n", size, align );
-	return NULL;
+		/* Try discarding some cached data to free up memory */
+		if ( ! discard_cache() ) {
+			/* Nothing available to discard */
+			DBG ( "Failed to allocate %#zx (aligned %#zx)\n",
+			      size, align );
+			return NULL;
+		}
+	}
 }
 
 /**
diff --git a/src/include/gpxe/malloc.h b/src/include/gpxe/malloc.h
index c02a866..4987905 100644
--- a/src/include/gpxe/malloc.h
+++ b/src/include/gpxe/malloc.h
@@ -18,6 +18,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
  *
  */
 #include <stdlib.h>
+#include <gpxe/tables.h>
 
 extern size_t freemem;
 
@@ -56,4 +57,20 @@ static inline void free_dma ( void *ptr, size_t size ) {
 	free_memblock ( ptr, size );
 }
 
+/** A cache discarder */
+struct cache_discarder {
+	/**
+	 * Discard some cached data
+	 *
+	 * @ret discarded	Number of cached items discarded
+	 */
+	unsigned int ( * discard ) ( void );
+};
+
+/** Cache discarder table */
+#define CACHE_DISCARDERS __table ( struct cache_discarder, "cache_discarders" )
+
+/** Declare a cache discarder */
+#define __cache_discarder __table_entry ( CACHE_DISCARDERS, 01 )
+
 #endif /* _GPXE_MALLOC_H */

------
commit 85f92fe3e1017d90a181c921132ce539d76a7c94
Author: Piotr Jaroszyński <p.jaroszynski at gmail.com>
Date: Thu Jul 22 22:10:40 2010 +0200
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Fix a 64bit compile time error

Signed-off-by: Piotr Jaroszyński <p.jaroszynski at gmail.com>
Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index 76fa5a3..637bfce 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -1104,7 +1104,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
 		    ( ( flags & TCP_FIN ) ? 1 : 0 ) );
 
 	/* Dump header */
-	DBGC2 ( tcp, "TCP %p RX %d<-%d           %08x %08x..%08zx %4zd",
+	DBGC2 ( tcp, "TCP %p RX %d<-%d           %08x %08x..%08x %4zd",
 		tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
 		ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
 		( ntohl ( tcphdr->seq ) + seq_len ), len );

------
commit 62b461e43ee39b976a6a4d6b21861b624394e241
Author: Michael Brown <mcb30 at ipxe.org>
Date: Tue Jul 20 23:17:30 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Handle out-of-order received packets

Maintain a queue of received packets, so that lost packets need not
result in retransmission of the entire TCP window.

Increase the TCP window to 8kB, in order that we can potentially
transmit enough duplicate ACKs to trigger Fast Retransmission at the
sender.

Using a 10MB HTTP download in qemu-kvm with an artificial drop rate of
1 in 64 packets, this reduces the download time from around 26s to
around 4s.

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/include/gpxe/tcp.h |   31 ++++++++-
 src/net/tcp.c          |  184 +++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 180 insertions(+), 35 deletions(-)

diff --git a/src/include/gpxe/tcp.h b/src/include/gpxe/tcp.h
index 7ae7eab..be76172 100644
--- a/src/include/gpxe/tcp.h
+++ b/src/include/gpxe/tcp.h
@@ -287,7 +287,7 @@ struct tcp_options {
  * that payloads remain dword-aligned.
  */
 //#define TCP_MAX_WINDOW_SIZE	( 65536 - 4 )
-#define TCP_MAX_WINDOW_SIZE	4096
+#define TCP_MAX_WINDOW_SIZE	8192
 
 /**
  * Path MTU
@@ -313,6 +313,35 @@ struct tcp_options {
  */
 #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
 
+/**
+ * Compare TCP sequence numbers
+ *
+ * @v seq1		Sequence number 1
+ * @v seq2		Sequence number 2
+ * @ret diff		Sequence difference
+ *
+ * Analogous to memcmp(), returns an integer less than, equal to, or
+ * greater than zero if @c seq1 is found, respectively, to be before,
+ * equal to, or after @c seq2.
+ */
+static inline __attribute__ (( always_inline )) int32_t
+tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
+	return ( ( int32_t ) ( seq1 - seq2 ) );
+}
+
+/**
+ * Check if TCP sequence number lies within window
+ *
+ * @v seq		Sequence number
+ * @v start		Start of window
+ * @v len		Length of window
+ * @ret in_window	Sequence number is within window
+ */
+static inline int tcp_in_window ( uint32_t seq, uint32_t start,
+				  uint32_t len ) {
+	return ( ( seq - start ) < len );
+}
+
 extern struct tcpip_protocol tcp_protocol;
 
 #endif /* _GPXE_TCP_H */
diff --git a/src/net/tcp.c b/src/net/tcp.c
index 8df80a8..76fa5a3 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -80,7 +80,9 @@ struct tcp_connection {
 	uint32_t ts_recent;
 
 	/** Transmit queue */
-	struct list_head queue;
+	struct list_head tx_queue;
+	/** Receive queue */
+	struct list_head rx_queue;
 	/** Retransmission timer */
 	struct retry_timer timer;
 	/** Shutdown (TIME_WAIT) timer */
@@ -97,6 +99,29 @@ enum tcp_flags {
 	TCP_ACK_PENDING = 0x0004,
 };
 
+/** TCP internal header
+ *
+ * This is the header that replaces the TCP header for packets
+ * enqueued on the receive queue.
+ */
+struct tcp_rx_queued_header {
+	/** SEQ value, in host-endian order
+	 *
+	 * This represents the SEQ value at the time the packet is
+	 * enqueued, and so excludes the SYN, if present.
+	 */
+	uint32_t seq;
+	/** Flags
+	 *
+	 * Only FIN is valid within this flags byte; all other flags
+	 * have already been processed by the time the packet is
+	 * enqueued.
+	 */
+	uint8_t flags;
+	/** Reserved */
+	uint8_t reserved[3];
+};
+
 /**
  * List of registered TCP connections
  */
@@ -245,7 +270,8 @@ static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer,
 	tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
 	tcp_dump_state ( tcp );
 	tcp->snd_seq = random();
-	INIT_LIST_HEAD ( &tcp->queue );
+	INIT_LIST_HEAD ( &tcp->tx_queue );
+	INIT_LIST_HEAD ( &tcp->rx_queue );
 	memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
 
 	/* Bind to local port */
@@ -296,8 +322,14 @@ static void tcp_close ( struct tcp_connection *tcp, int rc ) {
 		tcp->tcp_state = TCP_CLOSED;
 		tcp_dump_state ( tcp );
 
+		/* Free any unprocessed I/O buffers */
+		list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
+			list_del ( &iobuf->list );
+			free_iob ( iobuf );
+		}
+
 		/* Free any unsent I/O buffers */
-		list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) {
+		list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
 			list_del ( &iobuf->list );
 			free_iob ( iobuf );
 		}
@@ -318,7 +350,7 @@ static void tcp_close ( struct tcp_connection *tcp, int rc ) {
 		tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 );
 
 	/* If we have no data remaining to send, start sending FIN */
-	if ( list_empty ( &tcp->queue ) ) {
+	if ( list_empty ( &tcp->tx_queue ) ) {
 		tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
 		tcp_dump_state ( tcp );
 	}
@@ -366,14 +398,14 @@ static size_t tcp_xmit_win ( struct tcp_connection *tcp ) {
  * (if provided) and, if @c remove is true, removed from the transmit
  * queue.
  */
-static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len,
-				  struct io_buffer *dest, int remove ) {
+static size_t tcp_process_tx_queue ( struct tcp_connection *tcp, size_t max_len,
+				     struct io_buffer *dest, int remove ) {
 	struct io_buffer *iobuf;
 	struct io_buffer *tmp;
 	size_t frag_len;
 	size_t len = 0;
 
-	list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) {
+	list_for_each_entry_safe ( iobuf, tmp, &tcp->tx_queue, list ) {
 		frag_len = iob_len ( iobuf );
 		if ( frag_len > max_len )
 			frag_len = max_len;
@@ -426,8 +458,8 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
 	 * lengths that we wish to transmit.
 	 */
 	if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) {
-		len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ),
-					  NULL, 0 );
+		len = tcp_process_tx_queue ( tcp, tcp_xmit_win ( tcp ),
+					     NULL, 0 );
 	}
 	seq_len = len;
 	flags = TCP_FLAGS_SENDING ( tcp->tcp_state );
@@ -461,7 +493,7 @@ static int tcp_xmit ( struct tcp_connection *tcp ) {
 	iob_reserve ( iobuf, MAX_HDR_LEN );
 
 	/* Fill data payload from transmit queue */
-	tcp_process_queue ( tcp, len, iobuf, 0 );
+	tcp_process_tx_queue ( tcp, len, iobuf, 0 );
 
 	/* Expand receive window if possible */
 	max_rcv_win = ( ( freemem * 3 ) / 4 );
@@ -735,7 +767,7 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
 	}
 
 	/* Ignore duplicate SYN */
-	if ( ( tcp->rcv_ack - seq ) > 0 )
+	if ( seq != tcp->rcv_ack )
 		return 0;
 
 	/* Acknowledge SYN */
@@ -806,14 +838,14 @@ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
 	tcp->snd_win = win;
 
 	/* Remove any acknowledged data from transmit queue */
-	tcp_process_queue ( tcp, len, NULL, 1 );
+	tcp_process_tx_queue ( tcp, len, NULL, 1 );
 		
 	/* Mark SYN/FIN as acknowledged if applicable. */
 	if ( acked_flags )
 		tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
 
 	/* Start sending FIN if we've had all possible data ACKed */
-	if ( list_empty ( &tcp->queue ) && ( tcp->flags & TCP_XFER_CLOSED ) )
+	if ( list_empty ( &tcp->tx_queue ) && ( tcp->flags & TCP_XFER_CLOSED ) )
 		tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
 
 	return 0;
@@ -868,7 +900,7 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
 static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
 
 	/* Ignore duplicate or out-of-order FIN */
-	if ( ( tcp->rcv_ack - seq ) > 0 )
+	if ( seq != tcp->rcv_ack )
 		return 0;
 
 	/* Acknowledge FIN */
@@ -898,7 +930,7 @@ static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
 	 * ACKed.
 	 */
 	if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
-		if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win )
+		if ( ! tcp_in_window ( seq, tcp->rcv_ack, tcp->rcv_win ) )
 			return 0;
 	} else {
 		if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
@@ -915,6 +947,95 @@ static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
 }
 
 /**
+ * Enqueue received TCP packet
+ *
+ * @v tcp		TCP connection
+ * @v seq		SEQ value (in host-endian order)
+ * @v flags		TCP flags
+ * @v iobuf		I/O buffer
+ */
+static void tcp_rx_enqueue ( struct tcp_connection *tcp, uint32_t seq,
+			     uint8_t flags, struct io_buffer *iobuf ) {
+	struct tcp_rx_queued_header *tcpqhdr;
+	struct io_buffer *queued;
+	size_t len;
+	uint32_t seq_len;
+
+	/* Calculate remaining flags and sequence length.  Note that
+	 * SYN, if present, has already been processed by this point.
+	 */
+	flags &= TCP_FIN;
+	len = iob_len ( iobuf );
+	seq_len = ( len + ( flags ? 1 : 0 ) );
+
+	/* Discard immediately (to save memory) if:
+	 *
+	 * a) we have not yet received a SYN (and so have no defined
+	 *    receive window), or
+	 * b) the packet lies entirely outside the receive window, or
+	 * c) there is no further content to process.
+	 */
+	if ( ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) ||
+	     ( tcp_cmp ( seq, tcp->rcv_ack + tcp->rcv_win ) >= 0 ) ||
+	     ( tcp_cmp ( seq + seq_len, tcp->rcv_ack ) < 0 ) ||
+	     ( seq_len == 0 ) ) {
+		free_iob ( iobuf );
+		return;
+	}
+
+	/* Add internal header */
+	tcpqhdr = iob_push ( iobuf, sizeof ( *tcpqhdr ) );
+	tcpqhdr->seq = seq;
+	tcpqhdr->flags = flags;
+
+	/* Add to RX queue */
+	list_for_each_entry ( queued, &tcp->rx_queue, list ) {
+		tcpqhdr = queued->data;
+		if ( tcp_cmp ( seq, tcpqhdr->seq ) < 0 )
+			break;
+	}
+	list_add_tail ( &iobuf->list, &queued->list );
+}
+
+/**
+ * Process receive queue
+ *
+ * @v tcp		TCP connection
+ */
+static void tcp_process_rx_queue ( struct tcp_connection *tcp ) {
+	struct io_buffer *iobuf;
+	struct io_buffer *tmp;
+	struct tcp_rx_queued_header *tcpqhdr;
+	uint32_t seq;
+	unsigned int flags;
+	size_t len;
+
+	/* Process all applicable received buffers */
+	list_for_each_entry_safe ( iobuf, tmp, &tcp->rx_queue, list ) {
+		tcpqhdr = iobuf->data;
+		if ( tcp_cmp ( tcpqhdr->seq, tcp->rcv_ack ) > 0 )
+			break;
+
+		/* Strip internal header and remove from RX queue */
+		list_del ( &iobuf->list );
+		seq = tcpqhdr->seq;
+		flags = tcpqhdr->flags;
+		iob_pull ( iobuf, sizeof ( *tcpqhdr ) );
+		len = iob_len ( iobuf );
+
+		/* Handle new data, if any */
+		tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) );
+		seq += len;
+
+		/* Handle FIN, if present */
+		if ( flags & TCP_FIN ) {
+			tcp_rx_fin ( tcp, seq );
+			seq++;
+		}
+	}
+}
+
+/**
  * Process received packet
  *
  * @v iobuf		I/O buffer
@@ -935,9 +1056,9 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	uint32_t seq;
 	uint32_t ack;
 	uint32_t win;
-	uint32_t ts_recent;
 	unsigned int flags;
 	size_t len;
+	uint32_t seq_len;
 	int rc;
 
 	/* Sanity check packet */
@@ -977,17 +1098,16 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	flags = tcphdr->flags;
 	tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
 		      ( hlen - sizeof ( *tcphdr ) ), &options );
-	ts_recent = ( options.tsopt ?
-		      ntohl ( options.tsopt->tsval ) : tcp->ts_recent );
 	iob_pull ( iobuf, hlen );
 	len = iob_len ( iobuf );
+	seq_len = ( len + ( ( flags & TCP_SYN ) ? 1 : 0 ) +
+		    ( ( flags & TCP_FIN ) ? 1 : 0 ) );
 
 	/* Dump header */
 	DBGC2 ( tcp, "TCP %p RX %d<-%d           %08x %08x..%08zx %4zd",
 		tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
 		ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
-		( ntohl ( tcphdr->seq ) + len +
-		  ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len);
+		( ntohl ( tcphdr->seq ) + seq_len ), len );
 	tcp_dump_flags ( tcp, tcphdr->flags );
 	DBGC2 ( tcp, "\n" );
 
@@ -998,6 +1118,10 @@ static int tcp_rx ( struct io_buffer *iobuf,
 		goto discard;
 	}
 
+	/* Update timestamp, if applicable */
+	if ( options.tsopt && tcp_in_window ( tcp->rcv_ack, seq, seq_len ) )
+		tcp->ts_recent = ntohl ( options.tsopt->tsval );
+
 	/* Handle ACK, if present */
 	if ( flags & TCP_ACK ) {
 		if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) {
@@ -1024,19 +1148,11 @@ static int tcp_rx ( struct io_buffer *iobuf,
 			goto discard;
 	}
 
-	/* Handle new data, if any */
-	tcp_rx_data ( tcp, seq, iob_disown ( iobuf ) );
-	seq += len;
+	/* Enqueue received data */
+	tcp_rx_enqueue ( tcp, seq, flags, iob_disown ( iobuf ) );
 
-	/* Handle FIN, if present */
-	if ( flags & TCP_FIN ) {
-		tcp_rx_fin ( tcp, seq );
-		seq++;
-	}
-
-	/* Update timestamp, if applicable */
-	if ( seq == tcp->rcv_ack )
-		tcp->ts_recent = ts_recent;
+	/* Process receive queue */
+	tcp_process_rx_queue ( tcp );
 
 	/* Dump out any state change as a result of the received packet */
 	tcp_dump_state ( tcp );
@@ -1105,7 +1221,7 @@ static size_t tcp_xfer_window ( struct xfer_interface *xfer ) {
 	 * of only one unACKed packet in the TX queue at any time; we
 	 * do this to conserve memory usage.
 	 */
-	if ( ! list_empty ( &tcp->queue ) )
+	if ( ! list_empty ( &tcp->tx_queue ) )
 		return 0;
 
 	/* Return TCP window length */
@@ -1127,7 +1243,7 @@ static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer,
 		container_of ( xfer, struct tcp_connection, xfer );
 
 	/* Enqueue packet */
-	list_add_tail ( &iobuf->list, &tcp->queue );
+	list_add_tail ( &iobuf->list, &tcp->tx_queue );
 
 	/* Transmit data, if possible */
 	tcp_xmit ( tcp );

------
commit 7c274bab1b4e753419646b3dcd5867a4baede85d
Author: Michael Brown <mcb30 at ipxe.org>
Date: Thu Jul 15 19:33:46 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Treat ACKs as sent only when successfully transmitted

gPXE currently forces sending (i.e. sends a pure ACK even in the
absence of fresh data to send) only in response to packets that
consume sequence space or that lie outside of the receive window.
This ignores the possibility that a previous ACK was not actually sent
(due to, for example, the retransmission timer running).

This does not cause incorrect behaviour, but does cause unnecessary
retransmissions from our peer.  For example:

 1. Peer sends final data packet (ack      106 seq 521..523)
 2. We send FIN                  (seq 106..107 ack      523)
 3. Peer sends FIN               (ack      106 seq 523..524)
 4. We send nothing since retransmission timer is running for our FIN
 5. Peer ACKs our FIN            (ack      107 seq 524..524)
 6. We send nothing since this packet consumes no sequence space
 7. Peer retransmits FIN         (ack      107 seq 523..524)
 8. We ACK peer's FIN            (seq 107..107 ack      524)

What should happen at step (6) is that we should ACK the peer's FIN,
since we can deduce that we have never sent this ACK.

Fix by maintaining an "ACK pending" flag that is set whenever we are
made aware that our peer needs an ACK (whether by consuming sequence
space or by sending a packet that appears out of order), and is
cleared only when the ACK packet has been transmitted.

Reported-by: Piotr Jaroszyński <p.jaroszynski at gmail.com>
Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   41 ++++++++++++++++++++---------------------
 1 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index 53d102c..8df80a8 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -93,6 +93,8 @@ enum tcp_flags {
 	TCP_XFER_CLOSED = 0x0001,
 	/** TCP timestamps are enabled */
 	TCP_TS_ENABLED = 0x0002,
+	/** TCP acknowledgement is pending */
+	TCP_ACK_PENDING = 0x0004,
 };
 
 /**
@@ -396,7 +398,6 @@ static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len,
  * Transmit any outstanding data
  *
  * @v tcp		TCP connection
- * @v force_send	Force sending of packet
  * 
  * Transmits any outstanding data on the connection.
  *
@@ -404,7 +405,7 @@ static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len,
  * will have been started if necessary, and so the stack will
  * eventually attempt to retransmit the failed packet.
  */
-static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
+static int tcp_xmit ( struct tcp_connection *tcp ) {
 	struct io_buffer *iobuf;
 	struct tcp_header *tcphdr;
 	struct tcp_mss_option *mssopt;
@@ -438,7 +439,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 	tcp->snd_sent = seq_len;
 
 	/* If we have nothing to transmit, stop now */
-	if ( ( seq_len == 0 ) && ! force_send )
+	if ( ( seq_len == 0 ) && ! ( tcp->flags & TCP_ACK_PENDING ) )
 		return 0;
 
 	/* If we are transmitting anything that requires
@@ -519,6 +520,9 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 		return rc;
 	}
 
+	/* Clear ACK-pending flag */
+	tcp->flags &= ~TCP_ACK_PENDING;
+
 	return 0;
 }
 
@@ -552,7 +556,7 @@ static void tcp_expired ( struct retry_timer *timer, int over ) {
 		tcp_close ( tcp, -ETIMEDOUT );
 	} else {
 		/* Otherwise, retransmit the packet */
-		tcp_xmit ( tcp, 0 );
+		tcp_xmit ( tcp );
 	}
 }
 
@@ -709,6 +713,7 @@ static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
 	} else {
 		tcp->rcv_win = 0;
 	}
+	tcp->flags |= TCP_ACK_PENDING;
 }
 
 /**
@@ -927,7 +932,6 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	struct tcp_options options;
 	size_t hlen;
 	uint16_t csum;
-	uint32_t start_seq;
 	uint32_t seq;
 	uint32_t ack;
 	uint32_t win;
@@ -967,7 +971,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	
 	/* Parse parameters from header and strip header */
 	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
-	start_seq = seq = ntohl ( tcphdr->seq );
+	seq = ntohl ( tcphdr->seq );
 	ack = ntohl ( tcphdr->ack );
 	win = ntohs ( tcphdr->win );
 	flags = tcphdr->flags;
@@ -1002,6 +1006,12 @@ static int tcp_rx ( struct io_buffer *iobuf,
 		}
 	}
 
+	/* Force an ACK if this packet is out of order */
+	if ( ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) &&
+	     ( seq != tcp->rcv_ack ) ) {
+		tcp->flags |= TCP_ACK_PENDING;
+	}
+
 	/* Handle SYN, if present */
 	if ( flags & TCP_SYN ) {
 		tcp_rx_syn ( tcp, seq, &options );
@@ -1031,19 +1041,8 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	/* Dump out any state change as a result of the received packet */
 	tcp_dump_state ( tcp );
 
-	/* Send out any pending data.  We force sending a reply if either
-	 *
-	 *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
-	 *  b) either end of the packet was outside the receive window
-	 *
-	 * Case (b) enables us to support TCP keepalives using
-	 * zero-length packets, which we would otherwise ignore.  Note
-	 * that for case (b), we need *only* consider zero-length
-	 * packets, since non-zero-length packets will already be
-	 * caught by case (a).
-	 */
-	tcp_xmit ( tcp, ( ( start_seq != seq ) ||
-			  ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
+	/* Send out any pending data */
+	tcp_xmit ( tcp );
 
 	/* If this packet was the last we expect to receive, set up
 	 * timer to expire and cause the connection to be freed.
@@ -1089,7 +1088,7 @@ static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) {
 	tcp_close ( tcp, rc );
 
 	/* Transmit FIN, if possible */
-	tcp_xmit ( tcp, 0 );
+	tcp_xmit ( tcp );
 }
 
 /**
@@ -1131,7 +1130,7 @@ static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer,
 	list_add_tail ( &iobuf->list, &tcp->queue );
 
 	/* Transmit data, if possible */
-	tcp_xmit ( tcp, 0 );
+	tcp_xmit ( tcp );
 
 	return 0;
 }

------
commit 7cf46f8d65da8c7c408e73c31ec706bb1226f8a9
Author: Michael Brown <mcb30 at ipxe.org>
Date: Thu Jul 15 19:15:36 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Merge boolean flags into a single "flags" field

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Modified-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   23 +++++++++++++++--------
 1 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index d703afa..53d102c 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -30,10 +30,11 @@ struct tcp_connection {
 	/** List of TCP connections */
 	struct list_head list;
 
+	/** Flags */
+	unsigned int flags;
+
 	/** Data transfer interface */
 	struct xfer_interface xfer;
-	/** Data transfer interface closed flag */
-	int xfer_closed;
 
 	/** Remote socket address */
 	struct sockaddr_tcpip peer;
@@ -77,8 +78,6 @@ struct tcp_connection {
 	 * Equivalent to TS.Recent in RFC 1323 terminology.
 	 */
 	uint32_t ts_recent;
-	/** Timestamps enabled */
-	int timestamps;
 
 	/** Transmit queue */
 	struct list_head queue;
@@ -88,6 +87,14 @@ struct tcp_connection {
 	struct retry_timer wait;
 };
 
+/** TCP flags */
+enum tcp_flags {
+	/** TCP data transfer interface has been closed */
+	TCP_XFER_CLOSED = 0x0001,
+	/** TCP timestamps are enabled */
+	TCP_TS_ENABLED = 0x0002,
+};
+
 /**
  * List of registered TCP connections
  */
@@ -275,7 +282,7 @@ static void tcp_close ( struct tcp_connection *tcp, int rc ) {
 	/* Close data transfer interface */
 	xfer_nullify ( &tcp->xfer );
 	xfer_close ( &tcp->xfer, rc );
-	tcp->xfer_closed = 1;
+	tcp->flags |= TCP_XFER_CLOSED;
 
 	/* If we are in CLOSED, or have otherwise not yet received a
 	 * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the
@@ -474,7 +481,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 		mssopt->length = sizeof ( *mssopt );
 		mssopt->mss = htons ( TCP_MSS );
 	}
-	if ( ( flags & TCP_SYN ) || tcp->timestamps ) {
+	if ( ( flags & TCP_SYN ) || ( tcp->flags & TCP_TS_ENABLED ) ) {
 		tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
 		memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
 		tsopt->tsopt.kind = TCP_OPTION_TS;
@@ -719,7 +726,7 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
 	if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
 		tcp->rcv_ack = seq;
 		if ( options->tsopt )
-			tcp->timestamps = 1;
+			tcp->flags |= TCP_TS_ENABLED;
 	}
 
 	/* Ignore duplicate SYN */
@@ -801,7 +808,7 @@ static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
 		tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
 
 	/* Start sending FIN if we've had all possible data ACKed */
-	if ( list_empty ( &tcp->queue ) && tcp->xfer_closed )
+	if ( list_empty ( &tcp->queue ) && ( tcp->flags & TCP_XFER_CLOSED ) )
 		tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
 
 	return 0;

------
commit 2ad0cd4863f9e5abb520365ed7a4c1111627f8b4
Author: Michael Brown <mcb30 at ipxe.org>
Date: Thu Jul 15 18:57:34 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Use a dedicated timer for the TIME_WAIT state

gPXE currently repurposes the retransmission timer to hold the TCP
connection in the TIME_WAIT state (i.e. waiting for up to 2*MSL in
case we are required to re-ACK our peer's FIN due to a lost ACK).
However, the fact that this timer is running will prevent such an ACK
from ever being sent, since the logic in tcp_xmit() assumes that a
running timer indicates that we ourselves are waiting for an ACK and
so blocks the transmission.  (We always wait for an ACK before sending
our next packet, to keep our transmit data path as simple as
possible.)

Fix by using an entirely separate timer for the TIME_WAIT state, so
that packets can still be sent.

Reported-by: Piotr Jaroszyński <p.jaroszynski at gmail.com>
Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   41 ++++++++++++++++++++++++++++++++---------
 1 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index 513c527..d703afa 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -84,6 +84,8 @@ struct tcp_connection {
 	struct list_head queue;
 	/** Retransmission timer */
 	struct retry_timer timer;
+	/** Shutdown (TIME_WAIT) timer */
+	struct retry_timer wait;
 };
 
 /**
@@ -94,6 +96,7 @@ static LIST_HEAD ( tcp_conns );
 /* Forward declarations */
 static struct xfer_interface_operations tcp_xfer_operations;
 static void tcp_expired ( struct retry_timer *timer, int over );
+static void tcp_wait_expired ( struct retry_timer *timer, int over );
 static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
 			uint32_t win );
 
@@ -228,6 +231,7 @@ static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer,
 	ref_init ( &tcp->refcnt, NULL );
 	xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt );
 	timer_init ( &tcp->timer, tcp_expired );
+	timer_init ( &tcp->wait, tcp_wait_expired );
 	tcp->prev_tcp_state = TCP_CLOSED;
 	tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
 	tcp_dump_state ( tcp );
@@ -514,13 +518,12 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 /**
  * Retransmission timer expired
  *
- * @v timer	Retry timer
- * @v over	Failure indicator
+ * @v timer		Retransmission timer
+ * @v over		Failure indicator
  */
 static void tcp_expired ( struct retry_timer *timer, int over ) {
 	struct tcp_connection *tcp =
 		container_of ( timer, struct tcp_connection, timer );
-	int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state );
 
 	DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp,
 	       ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ),
@@ -530,14 +533,12 @@ static void tcp_expired ( struct retry_timer *timer, int over ) {
 		 ( tcp->tcp_state == TCP_SYN_RCVD ) ||
 		 ( tcp->tcp_state == TCP_ESTABLISHED ) ||
 		 ( tcp->tcp_state == TCP_FIN_WAIT_1 ) ||
-		 ( tcp->tcp_state == TCP_TIME_WAIT ) ||
 		 ( tcp->tcp_state == TCP_CLOSE_WAIT ) ||
 		 ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
 
-	if ( over || graceful_close ) {
-		/* If we have finally timed out and given up, or if
-		 * this is the result of a graceful close, terminate
-		 * the connection
+	if ( over ) {
+		/* If we have finally timed out and given up,
+		 * terminate the connection
 		 */
 		tcp->tcp_state = TCP_CLOSED;
 		tcp_dump_state ( tcp );
@@ -549,6 +550,27 @@ static void tcp_expired ( struct retry_timer *timer, int over ) {
 }
 
 /**
+ * Shutdown timer expired
+ *
+ * @v timer		Shutdown timer
+ * @v over		Failure indicator
+ */
+static void tcp_wait_expired ( struct retry_timer *timer, int over __unused ) {
+	struct tcp_connection *tcp =
+		container_of ( timer, struct tcp_connection, wait );
+
+	assert ( tcp->tcp_state == TCP_TIME_WAIT );
+
+	DBGC ( tcp, "TCP %p wait complete in %s for %08x..%08x %08x\n", tcp,
+	       tcp_state ( tcp->tcp_state ), tcp->snd_seq,
+	       ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
+
+	tcp->tcp_state = TCP_CLOSED;
+	tcp_dump_state ( tcp );
+	tcp_close ( tcp, 0 );
+}
+
+/**
  * Send RST response to incoming packet
  *
  * @v in_tcphdr		TCP header of incoming packet
@@ -1020,7 +1042,8 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	 * timer to expire and cause the connection to be freed.
 	 */
 	if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) {
-		start_timer_fixed ( &tcp->timer, ( 2 * TCP_MSL ) );
+		stop_timer ( &tcp->wait );
+		start_timer_fixed ( &tcp->wait, ( 2 * TCP_MSL ) );
 	}
 
 	return 0;

------
commit 02e1db51140858113a9a6ffca4c775139b4a470c
Author: Guo-Fu Tseng <cooldavid at cooldavid.org>
Date: Tue Jul 13 21:18:47 2010 +0800
Committer: Marty Connor <mdc at etherboot.org>

[hci] Continue processing while prompting for shell banner

Currently we do nothing while polling for user input in shell_banner.
This commit modifies shell_bannder to do something similar to what is
done in shell, where we schedule processes while waiting for user
input.

This approach provides two potential improvements:

First:
	We increase key responsiveness in shell_banner. It is much more
	likely to successfully get into shell than before. Before this
	change I sometimes pressed the CTRL_B, but failed to get into
	shell. It may have been caused by waiting too long in
	mdelay(100).

Second:
	When using scripts, if the downloaded image is not bootable,
	gPXE exits before it has the opportunity to gracefully close
	the TCP connection. This can leave the TCP state of a remote
	server as not closed when it should have been.

	gPXE waits for user to input for 2 seconds by default before
	returning control to BIOS. This commit lets TCP have the
	opportunity to gracefully close while waiting for the user to
	hit keys in shell_banner.

Referenced: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Commit-message-modified-by: Marty Connor <mdc at etherboot.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/core/getkey.c      |    2 +-
 src/hci/shell_banner.c |   15 +++------------
 src/include/console.h  |    1 +
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/core/getkey.c b/src/core/getkey.c
index dbd074c..15c57e5 100644
--- a/src/core/getkey.c
+++ b/src/core/getkey.c
@@ -38,7 +38,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
  * @v timeout		Timeout period, in ticks
  * @ret character	Character read from console
  */
-static int getchar_timeout ( unsigned long timeout ) {
+int getchar_timeout ( unsigned long timeout ) {
 	unsigned long expiry = ( currticks() + timeout );
 
 	while ( currticks() < expiry ) {
diff --git a/src/hci/shell_banner.c b/src/hci/shell_banner.c
index 91633eb..6153d85 100644
--- a/src/hci/shell_banner.c
+++ b/src/hci/shell_banner.c
@@ -37,10 +37,9 @@ FILE_LICENCE ( GPL2_OR_LATER );
  * @ret	enter_shell		User wants to enter shell
  */
 int shell_banner ( void ) {
-	int enter_shell = 0;
-	int wait_count;
 	int key;
 
+	/* Skip prompt if timeout is zero */
 	if ( BANNER_TIMEOUT <= 0 )
 		return 0;
 
@@ -48,18 +47,10 @@ int shell_banner ( void ) {
 	printf ( "\nPress Ctrl-B for the gPXE command line..." );
 
 	/* Wait for key */
-	for ( wait_count = 0 ; wait_count < BANNER_TIMEOUT ; wait_count++ ) {
-		if ( iskey() ) {
-			key = getchar();
-			if ( key == CTRL_B )
-				enter_shell = 1;
-			break;
-		}
-		mdelay(100);
-	}
+	key = getchar_timeout ( ( BANNER_TIMEOUT * TICKS_PER_SEC ) / 10 );
 
 	/* Clear the "Press Ctrl-B" line */
 	printf ( "\r                                         \r" );
 
-	return enter_shell;
+	return ( key == CTRL_B );
 }
diff --git a/src/include/console.h b/src/include/console.h
index 62fedf5..49a898b 100644
--- a/src/include/console.h
+++ b/src/include/console.h
@@ -112,6 +112,7 @@ struct console_driver {
 /* Function prototypes */
 
 extern void putchar ( int character );
+extern int getchar_timeout ( unsigned long timeout );
 extern int getchar ( void );
 extern int iskey ( void );
 extern int getkey ( void );

------
commit 99d3bd67d65464a7935092ebe5ec562e3dd21bd7
Author: Michael Brown <mcb30 at ipxe.org>
Date: Wed Jul 14 11:17:26 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[cmdline] Fix inconsistent and ugly code formatting in shell_banner()

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Modified-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/hci/shell_banner.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hci/shell_banner.c b/src/hci/shell_banner.c
index dbe3767..91633eb 100644
--- a/src/hci/shell_banner.c
+++ b/src/hci/shell_banner.c
@@ -41,10 +41,10 @@ int shell_banner ( void ) {
 	int wait_count;
 	int key;
 
-	if ( BANNER_TIMEOUT <=  0 ) {
-		return enter_shell;
-	}
+	if ( BANNER_TIMEOUT <= 0 )
+		return 0;
 
+	/* Display prompt */
 	printf ( "\nPress Ctrl-B for the gPXE command line..." );
 
 	/* Wait for key */

------
commit 419aa5b534c0b99e4c2a7acdd72ddc2492cbc94f
Author: Guo-Fu Tseng <cooldavid at cooldavid.org>
Date: Tue Jul 13 21:16:26 2010 +0800
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Randomize TCP bind port

This changes is intended to reduce possible TCP port collisions.

Reviewed-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   11 ++++++-----
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index 9dbc3f6..513c527 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -174,13 +174,14 @@ tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
  */
 static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) {
 	struct tcp_connection *existing;
-	static uint16_t try_port = 1023;
+	uint16_t try_port;
+	int i;
 
-	/* If no port specified, find the first available port */
+	/* If no port specified, find an available port */
 	if ( ! port ) {
-		while ( try_port ) {
-			try_port++;
-			if ( try_port < 1024 )
+		try_port = ( random() % 64512 ) + 1023;
+		for ( i = 0 ; i < 65536 ; ++i ) {
+			if ( ++try_port < 1024 )
 				continue;
 			if ( tcp_bind ( tcp, try_port ) == 0 )
 				return 0;

------
commit b3e7603286e77797f8fa460aaa2880636ec53b32
Author: Michael Brown <mcb30 at ipxe.org>
Date: Tue Jul 13 17:18:48 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Fix typos by changing ntohl() to htonl() where appropriate

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index a8cd185..9dbc3f6 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -474,8 +474,8 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 		memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
 		tsopt->tsopt.kind = TCP_OPTION_TS;
 		tsopt->tsopt.length = sizeof ( tsopt->tsopt );
-		tsopt->tsopt.tsval = ntohl ( currticks() );
-		tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent );
+		tsopt->tsopt.tsval = htonl ( currticks() );
+		tsopt->tsopt.tsecr = htonl ( tcp->ts_recent );
 	}
 	if ( ! ( flags & TCP_SYN ) )
 		flags |= TCP_PSH;

------
commit 69c0f8aec2e73255d0feef5eaaed458d0bd92a9d
Author: Michael Brown <mcb30 at ipxe.org>
Date: Tue Jul 13 17:15:57 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Store local port in host byte order

Every other scalar integer value in struct tcp_connection is in host
byte order; change the definition of local_port to match.

Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   18 +++++++++---------
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index 0a9d213..a8cd185 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -37,7 +37,7 @@ struct tcp_connection {
 
 	/** Remote socket address */
 	struct sockaddr_tcpip peer;
-	/** Local port, in network byte order */
+	/** Local port */
 	unsigned int local_port;
 
 	/** Current TCP state */
@@ -166,7 +166,7 @@ tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
  * Bind TCP connection to local port
  *
  * @v tcp		TCP connection
- * @v port		Local port number, in network-endian order
+ * @v port		Local port number
  * @ret rc		Return status code
  *
  * If the port is 0, the connection is assigned an available port
@@ -182,7 +182,7 @@ static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) {
 			try_port++;
 			if ( try_port < 1024 )
 				continue;
-			if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 )
+			if ( tcp_bind ( tcp, try_port ) == 0 )
 				return 0;
 		}
 		DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp );
@@ -193,13 +193,13 @@ static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) {
 	list_for_each_entry ( existing, &tcp_conns, list ) {
 		if ( existing->local_port == port ) {
 			DBGC ( tcp, "TCP %p could not bind: port %d in use\n",
-			       tcp, ntohs ( port ) );
+			       tcp, port );
 			return -EADDRINUSE;
 		}
 	}
 	tcp->local_port = port;
 
-	DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) );
+	DBGC ( tcp, "TCP %p bound to port %d\n", tcp, port );
 	return 0;
 }
 
@@ -235,7 +235,7 @@ static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer,
 	memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
 
 	/* Bind to local port */
-	bind_port = ( st_local ? st_local->st_port : 0 );
+	bind_port = ( st_local ? ntohs ( st_local->st_port ) : 0 );
 	if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 )
 		goto err;
 
@@ -481,7 +481,7 @@ static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
 		flags |= TCP_PSH;
 	tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
 	memset ( tcphdr, 0, sizeof ( *tcphdr ) );
-	tcphdr->src = tcp->local_port;
+	tcphdr->src = htons ( tcp->local_port );
 	tcphdr->dest = tcp->peer.st_port;
 	tcphdr->seq = htonl ( tcp->snd_seq );
 	tcphdr->ack = htonl ( tcp->rcv_ack );
@@ -613,7 +613,7 @@ static int tcp_xmit_reset ( struct tcp_connection *tcp,
 /**
  * Identify TCP connection by local port number
  *
- * @v local_port	Local port (in network-endian order)
+ * @v local_port	Local port
  * @ret tcp		TCP connection, or NULL
  */
 static struct tcp_connection * tcp_demux ( unsigned int local_port ) {
@@ -936,7 +936,7 @@ static int tcp_rx ( struct io_buffer *iobuf,
 	}
 	
 	/* Parse parameters from header and strip header */
-	tcp = tcp_demux ( tcphdr->dest );
+	tcp = tcp_demux ( ntohs ( tcphdr->dest ) );
 	start_seq = seq = ntohl ( tcphdr->seq );
 	ack = ntohl ( tcphdr->ack );
 	win = ntohs ( tcphdr->win );

------
commit ea51761c6cdab3b501a942cf8ec3db115ae2ef5a
Author: Michael Brown <mcb30 at ipxe.org>
Date: Sat May 22 00:45:49 2010 +0100
Committer: Marty Connor <mdc at etherboot.org>

[tcp] Update received sequence number before delivering received data

gPXE currently updates the TCP sequence number after delivering the
data to the application via xfer_deliver_iob().  If the application
responds to the received data by transmitting more data, this would
result in a stale ACK number appearing in the transmitted packet,
which potentially causes retransmissions and also gives the
undesirable appearance of violating causality (by sending a response
to a message that we claim not to have yet received).

Reported-by: Guo-Fu Tseng <cooldavid at cooldavid.org>
Signed-off-by: Michael Brown <mcb30 at ipxe.org>
Signed-off-by: Marty Connor <mdc at etherboot.org>
---
 src/net/tcp.c |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/net/tcp.c b/src/net/tcp.c
index f9fd409..0a9d213 100644
--- a/src/net/tcp.c
+++ b/src/net/tcp.c
@@ -703,13 +703,13 @@ static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
 	if ( ( tcp->rcv_ack - seq ) > 0 )
 		return 0;
 
+	/* Acknowledge SYN */
+	tcp_rx_seq ( tcp, 1 );
+
 	/* Mark SYN as received and start sending ACKs with each packet */
 	tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
 			    TCP_STATE_RCVD ( TCP_SYN ) );
 
-	/* Acknowledge SYN */
-	tcp_rx_seq ( tcp, 1 );
-
 	return 0;
 }
 
@@ -810,6 +810,9 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
 	iob_pull ( iobuf, already_rcvd );
 	len -= already_rcvd;
 
+	/* Acknowledge new data */
+	tcp_rx_seq ( tcp, len );
+
 	/* Deliver data to application */
 	if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
 		DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n",
@@ -817,9 +820,6 @@ static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
 		return rc;
 	}
 
-	/* Acknowledge new data */
-	tcp_rx_seq ( tcp, len );
-
 	return 0;
 }
 
@@ -836,10 +836,12 @@ static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
 	if ( ( tcp->rcv_ack - seq ) > 0 )
 		return 0;
 
-	/* Mark FIN as received and acknowledge it */
-	tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
+	/* Acknowledge FIN */
 	tcp_rx_seq ( tcp, 1 );
 
+	/* Mark FIN as received */
+	tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
+
 	/* Close connection */
 	tcp_close ( tcp, 0 );
 
-----------------------------------------------------------------------


-- 
Main gPXE repository


More information about the gPXE-commits mailing list