[PATCH 2/2] [myri10ge] New native driver

Glenn Brown glenn at myri.com
Mon Dec 21 20:01:39 EST 2009


This driver supports all current Myricom 10 gigabit Ethernet NICs.
It was written from scratch for gPXE by Glenn Brown <glenn at myri.com>,
referenencing Myricom's Linux and EFI drivers, with permission.

Signed-off-by: Glenn Brown <glenn at myri.com>
---
 src/drivers/net/myri10ge.c     | 1041 ++++++++++++++++++++++++++++++++++++++++
 src/drivers/net/myri10ge_mcp.h |  514 ++++++++++++++++++++
 src/include/gpxe/errfile.h     |    1 +
 src/include/gpxe/pci_ids.h     |    1 +
 4 files changed, 1557 insertions(+), 0 deletions(-)
 create mode 100644 src/drivers/net/myri10ge.c
 create mode 100644 src/drivers/net/myri10ge_mcp.h

diff --git a/src/drivers/net/myri10ge.c b/src/drivers/net/myri10ge.c
new file mode 100644
index 0000000..ac2e124
--- /dev/null
+++ b/src/drivers/net/myri10ge.c
@@ -0,0 +1,1041 @@
+/************************************************* -*- linux-c -*-
+ * Myricom 10Gb Network Interface Card Software
+ * Copyright 2009, Myricom, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ ****************************************************************/
+
+FILE_LICENCE ( GPL2_ONLY );
+
+/*
+ * Author: Glenn Brown <glenn at myri.com>
+ */
+
+/*
+ * General Theory of Operation
+ *
+ * This is a minimal Myricom 10 gigabit Ethernet driver for network
+ * boot.
+ *
+ * Initialization
+ *
+ * myri10ge_pci_probe() is called by gPXE during initialization.
+ * Minimal NIC initialization is performed to minimize resources
+ * consumed when the driver is resident but unused.
+ *
+ * Network Boot
+ *
+ * myri10ge_net_open() is called by gPXE before attempting to network
+ * boot from the card.  Packet buffers are allocated and the NIC
+ * interface is initialized.
+ *
+ * Transmit
+ *
+ * myri10ge_net_transmit() enqueues frames for transmission by writing
+ * discriptors to the NIC's tx ring.  For simplicity and to avoid
+ * copies, we always have the NIC DMA up the packet.  The sent I/O
+ * buffer is released once the NIC signals myri10ge_interrupt_handler()
+ * that the send has completed.
+ *
+ * Receive
+ *
+ * Receives are posted to the NIC's receive ring.  The NIC fills a
+ * DMAable receive_completion ring with completion notifications.
+ * myri10ge_net_poll() polls for these receive notifications, posts
+ * replacement receive buffers to the NIC, and passes received frames
+ * to netdev_rx().
+ */
+
+/*
+ * Debugging levels:
+ *	- DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
+ *	  TX overflow, corrupted packets, ...
+ *	- DBG2() is for successful events, like packet received,
+ *	  packet transmitted, and other general notifications.
+ *	- DBGP() prints the name of each called function on entry
+ */
+
+#include <stdint.h>
+
+#include <byteswap.h>
+#include <errno.h>
+#include <gpxe/ethernet.h>
+#include <gpxe/if_ether.h>
+#include <gpxe/iobuf.h>
+#include <gpxe/malloc.h>
+#include <gpxe/netdevice.h>
+#include <gpxe/pci.h>
+#include <gpxe/timer.h>
+
+#include "myri10ge_mcp.h"
+
+/****************************************************************
+ * Forward declarations
+ ****************************************************************/
+
+/* PCI driver entry points */
+
+static int	myri10ge_pci_probe ( struct pci_device*,
+				     const struct pci_device_id* );
+static void	myri10ge_pci_remove ( struct pci_device* );
+
+/* Network device operations */
+
+static void	myri10ge_net_close ( struct net_device* );
+static void	myri10ge_net_irq ( struct net_device*, int enable );
+static int	myri10ge_net_open ( struct net_device* );
+static void	myri10ge_net_poll ( struct net_device* );
+static int	myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
+
+/****************************************************************
+ * Constants
+ ****************************************************************/
+
+/* Maximum ring indices, used to wrap ring indices.  These must be 2**N-1. */
+
+#define MYRI10GE_TRANSMIT_WRAP                  1U
+#define MYRI10GE_RECEIVE_WRAP                   7U
+#define MYRI10GE_RECEIVE_COMPLETION_WRAP        31U
+
+/****************************************************************
+ * Driver internal data types.
+ ****************************************************************/
+
+/* Structure holding all DMA buffers for a NIC, which we will
+   allocated as contiguous read/write DMAable memory when the NIC is
+   initialized. */
+
+struct myri10ge_dma_buffers
+{
+	/* The NIC DMAs receive completion notifications into this ring */
+
+	mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
+
+	/* Interrupt details are DMAd here before interrupting. */
+
+	mcp_irq_data_t irq_data; /* 64B */
+
+	/* NIC command completion status is DMAd here. */
+
+	mcp_cmd_response_t command_response; /* 8B */
+};
+
+struct myri10ge_private
+{
+	/* Interrupt support */
+
+	uint32	*irq_claim;	/* in NIC SRAM */
+	uint32	*irq_deassert;	/* in NIC SRAM */
+
+	/* DMA buffers. */
+
+	struct myri10ge_dma_buffers	*dma;
+
+	/*
+	 * Transmit state.
+	 *
+	 * The counts here are uint32 for easy comparison with
+	 * priv->dma->irq_data.send_done_count and with each other.
+	 */
+
+	mcp_kreq_ether_send_t	*transmit_ring;	/* in NIC SRAM */
+	uint32                   transmit_ring_wrap;
+	uint32                   transmits_posted;
+	uint32                   transmits_done;
+	struct io_buffer	*transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
+
+	/*
+	 * Receive state.
+	 */
+
+	mcp_kreq_ether_recv_t	*receive_post_ring;	/* in NIC SRAM */
+	unsigned int             receive_post_ring_wrap;
+	unsigned int             receives_posted;
+	unsigned int             receives_done;
+	struct io_buffer	*receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
+
+	/* Address for writing commands to the firmware.
+	   BEWARE: the value must be written 32 bits at a time. */
+
+	mcp_cmd_t	*command;
+};
+
+/****************************************************************
+ * Driver internal functions.
+ ****************************************************************/
+
+/* Print ring status when debugging.  Use this only after a printed
+   value changes. */
+
+#define DBG2_RINGS( priv ) 						\
+	DBG2 ( "tx %x/%x rx %x/%x in %s() \n",				\
+	       ( priv ) ->transmits_done, ( priv ) -> transmits_posted,	\
+	       ( priv ) ->receives_done, ( priv ) -> receives_posted,	\
+	       __FUNCTION__ )
+
+/*
+ * Return a pointer to the driver private data for a network device.
+ *
+ * @v netdev	Network device created by this driver.
+ * @ret priv	The corresponding driver private data.
+ */
+static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
+{
+	/* Our private data always follows the network device in memory,
+	   since we use alloc_netdev() to allocate the storage. */
+
+	return ( struct myri10ge_private * ) ( nd + 1 );
+}
+
+/*
+ * Pass a receive buffer to the NIC to be filled.
+ *
+ * @v priv	The network device to receive the buffer.
+ * @v iob	The I/O buffer to fill.
+ *
+ * Receive buffers are filled in FIFO order.
+ */
+static void myri10ge_post_receive ( struct myri10ge_private *priv,
+				    struct io_buffer *iob )
+{
+	unsigned int		 receives_posted;
+	mcp_kreq_ether_recv_t	*request;
+
+	/* Record the posted I/O buffer, to be passed to netdev_rx() on
+	   receive. */
+
+	receives_posted = priv->receives_posted;
+	priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
+
+	/* Post the receive. */
+
+	request = &priv->receive_post_ring[receives_posted
+					   & priv->receive_post_ring_wrap];
+	request->addr_high = 0;
+	wmb();
+	request->addr_low = htonl ( virt_to_bus ( iob->data ) );
+	priv->receives_posted = ++receives_posted;
+}
+
+/*
+ * Execute a command on the NIC.
+ *
+ * @v priv	NIC to perform the command.
+ * @v cmd	The command to perform.
+ * @v data	I/O copy buffer for parameters/results
+ * @ret rc	0 on success, else an error code.
+ */
+static int myri10ge_command ( struct myri10ge_private *priv,
+			      uint32 cmd,
+			      uint32 data[3] )
+{
+	int				 i;
+	mcp_cmd_t			*command;
+	uint32				 result;
+	unsigned int			 slept_ms;
+	volatile mcp_cmd_response_t	*response;
+
+	DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
+	command = priv->command;
+	response = &priv->dma->command_response;
+
+	/* Mark the command as incomplete. */
+
+	response->result = 0xFFFFFFFF;
+
+	/* Pass the command to the NIC. */
+
+	command->cmd		    = htonl ( cmd );
+	command->data0		    = htonl ( data[0] );
+	command->data1		    = htonl ( data[1] );
+	command->data2		    = htonl ( data[2] );
+	command->response_addr.high = 0;
+	command->response_addr.low
+		= htonl ( virt_to_bus ( &priv->dma->command_response ) );
+	for ( i=0; i<36; i+=4 )
+		* ( uint32 * ) &command->pad[i] = 0;
+	wmb();
+	* ( uint32 * ) &command->pad[36] = 0;
+
+	/* Wait up to 2 seconds for a response. */
+
+	for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
+		result = response->result;
+		if ( result == 0 ) {
+			data[0] = ntohl ( response->data );
+			return 0;
+		} else if ( result != 0xFFFFFFFF ) {
+			DBG ( "cmd%d:0x%x\n",
+			      cmd,
+			      ntohl ( response->result ) );
+			return -EIO;
+		}
+		udelay ( 1000 );
+		rmb();
+	}
+	DBG ( "cmd%d:timed out\n", cmd );
+	return -ETIMEDOUT;
+}
+
+/*
+ * Handle any pending interrupt.
+ *
+ * @v netdev		Device being polled for interrupts.
+ *
+ * This is called periodically to let the driver check for interrupts.
+ */
+static void myri10ge_interrupt_handler ( struct net_device *netdev )
+{
+	struct myri10ge_private *priv;
+	mcp_irq_data_t		*irq_data;
+	uint8			 valid;
+
+	priv = myri10ge_priv ( netdev );
+	irq_data = &priv->dma->irq_data;
+
+	/* Return if there was no interrupt. */
+
+	rmb();
+	valid = irq_data->valid;
+	if ( !valid )
+		return;
+	DBG2 ( "irq " );
+
+	/* Tell the NIC to deassert the interrupt and clear
+	   irq_data->valid.*/
+
+	*priv->irq_deassert = 0;	/* any value is OK. */
+	mb();
+
+	/* Handle any new receives. */
+
+	if ( valid & 1 ) {
+
+		/* Pass the receive interrupt token back to the NIC. */
+
+		DBG2 ( "rx " );
+		*priv->irq_claim = htonl ( 3 );
+		wmb();
+	}
+
+	/* Handle any sent packet by freeing its I/O buffer, now that
+	   we know it has been DMAd. */
+
+	if ( valid & 2 ) {
+		unsigned int nic_done_count;
+
+		DBG2 ( "snt " );
+		nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
+		while ( priv->transmits_done != nic_done_count ) {
+			struct io_buffer *iob;
+
+			iob = priv->transmit_iob [priv->transmits_done
+						  & MYRI10GE_TRANSMIT_WRAP];
+			DBG2 ( "%p ", iob );
+			netdev_tx_complete ( netdev, iob );
+			++priv->transmits_done;
+		}
+	}
+
+	/* Record any statistics update. */
+
+	if ( irq_data->stats_updated ) {
+
+		/* Update the link status. */
+
+		DBG2 ( "stats " );
+		if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
+			netdev_link_up ( netdev );
+		else
+			netdev_link_down ( netdev );
+
+		/* Ignore all error counters from the NIC. */
+	}
+
+	/* Wait for the interrupt to be deasserted, as indicated by
+	   irq_data->valid, which is set by the NIC after the deassert. */
+
+	DBG2 ( "wait " );
+	do {
+		mb();
+	} while ( irq_data->valid );
+
+	/* Claim the interrupt to enable future interrupt generation. */
+
+	DBG2 ( "claim\n" );
+	* ( priv->irq_claim + 1 ) = htonl ( 3 );
+	mb();
+}
+
+/* Constants for reading the STRING_SPECS via the Myricom
+   Vendor Specific PCI configuration space capability. */
+
+#define VS_ADDR ( vs + 0x18 )
+#define VS_DATA ( vs + 0x14 )
+#define VS_MODE ( vs + 0x10 )
+#define 	VS_MODE_READ32 0x3
+#define 	VS_MODE_LOCATE 0x8
+#define 		VS_LOCATE_STRING_SPECS 0x3
+
+/*
+ * Read MAC address from its 'string specs' via the vendor-specific
+ * capability.  (This capability allows NIC SRAM and ROM to be read
+ * before it is mapped.)
+ *
+ * @v pci		The device.
+ * @v mac		Buffer to store the MAC address.
+ * @ret rc		Returns 0 on success, else an error code.
+ */
+static int mac_address_from_string_specs ( struct pci_device *pci,
+						   uint8 mac[ETH_ALEN] )
+{
+	char string_specs[256];
+	char *ptr, *limit;
+	char *to = string_specs;
+	uint32 addr;
+	uint32 len;
+	unsigned int vs;
+	int mac_set = 0;
+
+	/* Find the "vendor specific" capability. */
+
+	vs = pci_find_capability ( pci, 9 );
+	if ( vs == 0 ) {
+		DBG ( "no VS\n" );
+		return -ENOTSUP;
+	}
+
+	/* Locate the String specs in LANai SRAM. */
+
+	pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
+	pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
+	pci_read_config_dword ( pci, VS_ADDR, &addr );
+	pci_read_config_dword ( pci, VS_DATA, &len );
+	DBG2 ( "ss@%x,%x\n", addr, len );
+
+	/* Copy in the string specs.  Use 32-bit reads for performance. */
+
+	if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
+		DBG ( "SS too big\n" );
+		return -ENOTSUP;
+	}
+
+	pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
+	while ( len >= 4 ) {
+		uint32 tmp;
+
+		pci_write_config_byte ( pci, VS_ADDR, addr );
+		pci_read_config_dword ( pci, VS_DATA, &tmp );
+		tmp = ntohl ( tmp );
+		memcpy ( to, &tmp, 4 );
+		to += 4;
+		addr += 4;
+		len -= 4;
+	}
+	pci_write_config_byte ( pci, VS_MODE, 0 );
+
+	/* Parse the string specs. */
+
+	DBG2 ( "STRING_SPECS:\n" );
+	ptr = string_specs;
+	limit = string_specs + sizeof ( string_specs );
+	while ( *ptr != '\0' && ptr < limit ) {
+		DBG2 ( "%s\n", ptr );
+		if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
+			unsigned int i;
+
+			ptr += 4;
+			for ( i=0; i<6; i++ ) {
+				if ( ( ptr + 2 ) > limit ) {
+					DBG ( "bad MAC addr\n" );
+					return -ENOTSUP;
+				}
+				mac[i] = strtoul ( ptr, &ptr, 16 );
+				ptr += 1;
+			}
+			mac_set = 1;
+		}
+		else
+			while ( ptr < limit && *ptr++ );
+	}
+
+	/* Verify we parsed all we need. */
+
+	if ( !mac_set ) {
+		DBG ( "no MAC addr\n" );
+		return -ENOTSUP;
+	}
+
+	DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
+	       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
+
+	return 0;
+}
+
+/****************************************************************
+ * gPXE PCI Device Driver API functions
+ ****************************************************************/
+
+/*
+ * Initialize the PCI device.
+ *
+ * @v pci 		The device's associated pci_device structure.
+ * @v id  		The PCI device + vendor id.
+ * @ret rc		Returns zero if successfully initialized.
+ *
+ * This function is called very early on, while gPXE is initializing.
+ * This is a gPXE PCI Device Driver API function.
+ */
+static int myri10ge_pci_probe ( struct pci_device *pci,
+				const struct pci_device_id *id __unused )
+{
+	static struct net_device_operations myri10ge_operations = {
+		.open     = myri10ge_net_open,
+		.close    = myri10ge_net_close,
+		.transmit = myri10ge_net_transmit,
+		.poll     = myri10ge_net_poll,
+		.irq      = myri10ge_net_irq
+	};
+
+	const char *dbg;
+	int rc;
+	struct net_device *netdev;
+	struct myri10ge_private *priv;
+
+	DBGP ( "myri10ge_pci_probe: " );
+
+	netdev = alloc_etherdev ( sizeof ( *priv ) );
+	if ( !netdev ) {
+		rc = -ENOMEM;
+		dbg = "alloc_etherdev";
+		goto abort_with_nothing;
+	}
+
+	netdev_init ( netdev, &myri10ge_operations );
+	priv = myri10ge_priv ( netdev );
+
+	pci_set_drvdata ( pci, netdev );
+	netdev->dev = &pci->dev;
+
+	/* Make sure interrupts are disabled. */
+
+	myri10ge_net_irq ( netdev, 0 );
+
+	/* Read the NIC HW address. */
+
+	rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
+	if ( rc ) {
+		dbg = "mac_from_ss";
+		goto abort_with_netdev_init;
+	}
+	DBGP ( "mac " );
+
+	/* Enable bus master, etc. */
+
+	adjust_pci_device ( pci );
+	DBGP ( "pci " );
+
+	/* Register the initialized network device. */
+
+	rc = register_netdev ( netdev );
+	if ( rc ) {
+		dbg = "register_netdev";
+		goto abort_with_netdev_init;
+	}
+
+	DBGP ( "done\n" );
+
+	return 0;
+
+abort_with_netdev_init:
+	netdev_nullify ( netdev );
+	netdev_put ( netdev );
+abort_with_nothing:
+	DBG ( "%s:%s\n", dbg, strerror ( rc ) );
+	return rc;
+}
+
+/*
+ * Remove a device from the PCI device list.
+ *
+ * @v pci		PCI device to remove.
+ *
+ * This is a PCI Device Driver API function.
+ */
+static void myri10ge_pci_remove ( struct pci_device *pci )
+{
+	struct net_device	*netdev;
+
+	DBGP ( "myri10ge_pci_remove\n" );
+	netdev = pci_get_drvdata ( pci );
+
+	unregister_netdev ( netdev );
+	netdev_nullify ( netdev );
+	netdev_put ( netdev );
+}
+
+/****************************************************************
+ * gPXE Network Device Driver Operations
+ ****************************************************************/
+
+/*
+ * Close a network device.
+ *
+ * @v netdev		Device to close.
+ *
+ * This is a gPXE Network Device Driver API function.
+ */
+static void myri10ge_net_close ( struct net_device *netdev )
+{
+	struct myri10ge_private *priv;
+	uint32			 data[3];
+
+	DBGP ( "myri10ge_net_close\n" );
+	priv = myri10ge_priv ( netdev );
+
+	/* disable interrupts */
+
+	myri10ge_net_irq ( netdev, 0 );
+
+	/* Reset the NIC interface, so we won't get any more events from
+	   the NIC. */
+
+	myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
+
+	/* Free receive buffers that were never filled. */
+
+	while ( priv->receives_done != priv->receives_posted ) {
+		free_iob ( priv->receive_iob[priv->receives_done
+					     & MYRI10GE_RECEIVE_WRAP] );
+		++priv->receives_done;
+	}
+
+	/* Release DMAable memory. */
+
+	free_dma ( priv->dma, sizeof ( *priv->dma ) );
+
+	/* Erase all state from the open. */
+
+	memset ( priv, 0, sizeof ( *priv ) );
+
+	DBG2_RINGS ( priv );
+}
+
+/*
+ * Enable or disable IRQ masking.
+ *
+ * @v netdev		Device to control.
+ * @v enable		Zero to mask off IRQ, non-zero to enable IRQ.
+ *
+ * This is a gPXE Network Driver API function.
+ */
+static void myri10ge_net_irq ( struct net_device *netdev, int enable )
+{
+	struct pci_device	*pci_dev;
+	uint16			 val;
+
+	DBGP ( "myri10ge_net_irq\n" );
+	pci_dev = ( struct pci_device * ) netdev->dev;
+
+	/* Adjust the Interrupt Disable bit in the Command register of the
+	   PCI Device. */
+
+	pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
+	if ( enable )
+		val &= ~PCI_COMMAND_INTX_DISABLE;
+	else
+		val |= PCI_COMMAND_INTX_DISABLE;
+	pci_write_config_word ( pci_dev, PCI_COMMAND, val );
+}
+
+/*
+ * Opens a network device.
+ *
+ * @v netdev		Device to be opened.
+ * @ret rc  		Non-zero if failed to open.
+ *
+ * This enables tx and rx on the device.
+ * This is a gPXE Network Device Driver API function.
+ */
+static int myri10ge_net_open ( struct net_device *netdev )
+{
+	const char		*dbg;	/* printed upon error return */
+	int			 rc;
+	struct io_buffer	*iob;
+	struct myri10ge_private *priv;
+	uint32			 data[3];
+	struct pci_device	*pci_dev;
+	void			*membase;
+
+	DBGP ( "myri10ge_net_open\n" );
+	priv	= myri10ge_priv ( netdev );
+	pci_dev = ( struct pci_device * ) netdev->dev;
+	membase = phys_to_virt ( pci_dev->membase );
+
+	/* Compute address for passing commands to the firmware. */
+
+	priv->command = membase + MXGEFW_ETH_CMD;
+
+	/* Ensure interrupts are disabled. */
+
+	myri10ge_net_irq ( netdev, 0 );
+
+	/* Allocate cleared DMAable buffers. */
+
+	priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
+	if ( !priv->dma ) {
+		rc = -ENOMEM;
+		dbg = "DMA";
+		goto abort_with_nothing;
+	}
+	memset ( priv->dma, 0, sizeof ( *priv->dma ) );
+
+	/* Simplify following code. */
+
+#define TRY( prefix, base, suffix ) do {		\
+		rc = myri10ge_command ( priv,		\
+					MXGEFW_		\
+					## prefix	\
+					## base		\
+					## suffix,	\
+					data );		\
+		if ( rc ) {				\
+			dbg = #base;			\
+			goto abort_with_dma;		\
+		}					\
+	} while ( 0 )
+
+	/* Send a reset command to the card to see if it is alive,
+	   and to reset its queue state. */
+
+	TRY ( CMD_, RESET , );
+
+	/* Set the interrupt queue size. */
+
+	data[0] = ( sizeof ( priv->dma->receive_completion )
+		    | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
+	TRY ( CMD_SET_ , INTRQ_SIZE , );
+
+	/* Set the interrupt queue DMA address. */
+
+	data[0] = virt_to_bus ( &priv->dma->receive_completion );
+	data[1] = 0;
+	TRY ( CMD_SET_, INTRQ_DMA, );
+
+	/* Get the NIC interrupt claim address. */
+
+	TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
+	priv->irq_claim = membase + data[0];
+
+	/* Get the NIC interrupt assert address. */
+
+	TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
+	priv->irq_deassert = membase + data[0];
+
+	/* Disable interrupt coalescing, which is inappropriate for the
+	   minimal buffering we provide. */
+
+	TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
+	* ( ( uint32 * ) ( membase + data[0] ) ) = 0;
+
+	/* Set the NIC mac address. */
+
+	data[0] = ( netdev->ll_addr[0] << 24
+		    | netdev->ll_addr[1] << 16
+		    | netdev->ll_addr[2] << 8
+		    | netdev->ll_addr[3] );
+	data[1] = ( ( netdev->ll_addr[4] << 8 )
+		     | netdev->ll_addr[5] );
+	TRY ( SET_ , MAC_ADDRESS , );
+
+	/* Enable multicast receives, because some gPXE clients don't work
+	   without multicast. . */
+
+	TRY ( ENABLE_ , ALLMULTI , );
+
+	/* Disable Ethernet flow control, so the NIC cannot deadlock the
+	   network under any circumstances. */
+
+	TRY ( DISABLE_ , FLOW , _CONTROL );
+
+	/* Compute transmit ring sizes. */
+
+	data[0] = 0;		/* slice 0 */
+	TRY ( CMD_GET_, SEND_RING, _SIZE );
+	priv->transmit_ring_wrap
+		= data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
+	if ( priv->transmit_ring_wrap
+	     & ( priv->transmit_ring_wrap + 1 ) ) {
+		rc = -EPROTO;
+		dbg = "TX_RING";
+		goto abort_with_dma;
+	}
+
+	/* Compute receive ring sizes. */
+
+	data[0] = 0;		/* slice 0 */
+	TRY ( CMD_GET_ , RX_RING , _SIZE );
+	priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
+	if ( priv->receive_post_ring_wrap
+	     & ( priv->receive_post_ring_wrap + 1 ) ) {
+		rc = -EPROTO;
+		dbg = "RX_RING";
+		goto abort_with_dma;
+	}
+
+	/* Get NIC transmit ring address. */
+
+	data[0] = 0;		/* slice 0. */
+	TRY ( CMD_GET_, SEND, _OFFSET );
+	priv->transmit_ring = membase + data[0];
+
+	/* Get the NIC receive ring address. */
+
+	data[0] = 0;		/* slice 0. */
+	TRY ( CMD_GET_, SMALL_RX, _OFFSET );
+	priv->receive_post_ring = membase + data[0];
+
+	/* Set the Nic MTU. */
+
+	data[0] = ETH_FRAME_LEN;
+	TRY ( CMD_SET_, MTU, );
+
+	/* Tell the NIC our buffer sizes. ( We use only small buffers, so we
+	   set both buffer sizes to the same value, which will force all
+	   received frames to use small buffers. ) */
+
+	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
+	TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
+	data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
+	TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
+
+        /* Tell firmware where to DMA IRQ data */
+
+	data[0] = virt_to_bus ( &priv->dma->irq_data );
+	data[1] = 0;
+	data[2] = sizeof ( priv->dma->irq_data );
+	TRY ( CMD_SET_, STATS_DMA_V2, );
+
+	/* Post receives. */
+
+	while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
+
+		/* Reserve 2 extra bytes at the start of packets, since
+		   the firmware always skips the first 2 bytes of the buffer
+		   so TCP headers will be aligned. */
+
+		iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
+		if ( !iob ) {
+			rc = -ENOMEM;
+			dbg = "alloc_iob";
+			goto abort_with_receives_posted;
+		}
+		iob_reserve ( iob, MXGEFW_PAD );
+		myri10ge_post_receive ( priv, iob );
+	}
+
+	/* Bring up the link. */
+
+	TRY ( CMD_, ETHERNET_UP, );
+
+	DBG2_RINGS ( priv );
+	return 0;
+
+abort_with_receives_posted:
+	while ( priv->receives_posted-- )
+		free_iob ( priv->receive_iob[priv->receives_posted] );
+abort_with_dma:
+	/* Because the link is not up, we don't have to reset the NIC here. */
+	free_dma ( priv->dma, sizeof ( *priv->dma ) );
+abort_with_nothing:
+	/* Erase all signs of the failed open. */
+	memset ( priv, 0, sizeof ( *priv ) );
+	DBG ( "%s: %s\n", dbg, strerror ( rc ) );
+	return ( rc );
+}
+
+/*
+ * This function allows a driver to process events during operation.
+ *
+ * @v netdev		Device being polled.
+ *
+ * This is called periodically by gPXE to let the driver check the status of
+ * transmitted packets and to allow the driver to check for received packets.
+ * This is a gPXE Network Device Driver API function.
+ */
+static void myri10ge_net_poll ( struct net_device *netdev )
+{
+	struct io_buffer		*iob;
+	struct io_buffer		*replacement;
+	struct myri10ge_dma_buffers	*dma;
+	struct myri10ge_private		*priv;
+	unsigned int			 length;
+	unsigned int			 orig_receives_posted;
+
+	DBGP ( "myri10ge_net_poll\n" );
+	priv = myri10ge_priv ( netdev );
+	dma  = priv->dma;
+
+	/* Process any pending interrupt. */
+
+	myri10ge_interrupt_handler ( netdev );
+
+	/* Pass up received frames, but limit ourselves to receives posted
+	   before this function was called, so we cannot livelock if
+	   receives are arriving faster than we process them. */
+
+	orig_receives_posted = priv->receives_posted;
+	while ( priv->receives_done != orig_receives_posted ) {
+
+		/* Stop if there is no pending receive. */
+
+		length = ntohs ( dma->receive_completion
+				 [priv->receives_done
+				  & MYRI10GE_RECEIVE_COMPLETION_WRAP]
+				 .length );
+		if ( length == 0 )
+			break;
+
+		/* Allocate a replacement buffer.  If none is available,
+		   stop passing up packets until a buffer is available.
+
+		   Reserve 2 extra bytes at the start of packets, since
+		   the firmware always skips the first 2 bytes of the buffer
+		   so TCP headers will be aligned. */
+
+		replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
+		if ( !replacement ) {
+			DBG ( "NO RX BUF\n" );
+			break;
+		}
+		iob_reserve ( replacement, MXGEFW_PAD );
+
+		/* Pass up the received frame. */
+
+		iob = priv->receive_iob[priv->receives_done
+					& MYRI10GE_RECEIVE_WRAP];
+		iob_put ( iob, length );
+		netdev_rx ( netdev, iob );
+
+		/* We have consumed the packet, so clear the receive
+		   notification. */
+
+		dma->receive_completion [priv->receives_done
+					 & MYRI10GE_RECEIVE_COMPLETION_WRAP]
+			.length = 0;
+		wmb();
+
+		/* Replace the passed-up I/O buffer. */
+
+		myri10ge_post_receive ( priv, replacement );
+		++priv->receives_done;
+		DBG2_RINGS ( priv );
+	}
+}
+
+/*
+ * This transmits a packet.
+ *
+ * @v netdev		Device to transmit from.
+ * @v iobuf 		Data to transmit.
+ * @ret rc  		Non-zero if failed to transmit.
+ *
+ * This is a gPXE Network Driver API function.
+ */
+static int myri10ge_net_transmit ( struct net_device *netdev,
+				   struct io_buffer *iobuf )
+{
+	mcp_kreq_ether_send_t	*kreq;
+	size_t			 len;
+	struct myri10ge_private *priv;
+	uint32			 transmits_posted;
+
+	DBGP ( "myri10ge_net_transmit\n" );
+	priv = myri10ge_priv ( netdev );
+
+	/* Confirm space in the send ring. */
+
+	transmits_posted = priv->transmits_posted;
+	if ( transmits_posted - priv->transmits_done
+	     > MYRI10GE_TRANSMIT_WRAP ) {
+		DBG ( "TX ring full\n" );
+		return -ENOBUFS;
+	}
+
+	DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
+	DBG2_HD ( iobuf->data, 14 );
+
+	/* Record the packet being transmitted, so we can later report
+	   send completion. */
+
+	priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
+
+	/* Copy and pad undersized frames, because the NIC does not pad,
+	   and we would rather copy small frames than do a gather. */
+
+	len = iob_len ( iobuf );
+	if ( len < ETH_ZLEN ) {
+		iob_pad ( iobuf, ETH_ZLEN );
+		len = ETH_ZLEN;
+	}
+
+	/* Enqueue the packet by writing a descriptor to the NIC.
+	   This is a bit tricky because the HW requires 32-bit writes,
+	   but the structure has smaller fields. */
+
+	kreq = &priv->transmit_ring[transmits_posted
+				    & priv->transmit_ring_wrap];
+	kreq->addr_high = 0;
+	kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
+	( ( uint32 * ) kreq ) [2] = htonl (
+		0x0000 << 16	 /* pseudo_header_offset */
+		| ( len & 0xFFFF ) /* length */
+		);
+	wmb();
+	( ( uint32 * ) kreq ) [3] = htonl (
+		0x00 << 24	/* pad */
+		| 0x01 << 16	/* rdma_count */
+		| 0x00 << 8	/* cksum_offset */
+		| ( MXGEFW_FLAGS_SMALL
+		    | MXGEFW_FLAGS_FIRST
+		    | MXGEFW_FLAGS_NO_TSO ) /* flags */
+		);
+	wmb();
+
+	/* Mark the slot as consumed and return. */
+
+	priv->transmits_posted = ++transmits_posted;
+	DBG2_RINGS ( priv );
+	return 0;
+}
+
+static struct pci_device_id myri10ge_nics[] = {
+	/* Each of these macros must be a single line to satisfy a script. */
+	PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
+};
+
+struct pci_driver myri10ge_driver __pci_driver = {
+	.ids      = myri10ge_nics,
+	.id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
+	.probe    = myri10ge_pci_probe,
+	.remove   = myri10ge_pci_remove
+};
+
+/*
+ * Local variables:
+ *  c-basic-offset: 8
+ *  c-indent-level: 8
+ *  tab-width: 8
+ * End:
+ */
diff --git a/src/drivers/net/myri10ge_mcp.h b/src/drivers/net/myri10ge_mcp.h
new file mode 100644
index 0000000..397f8b0
--- /dev/null
+++ b/src/drivers/net/myri10ge_mcp.h
@@ -0,0 +1,514 @@
+/************************************************* -*- linux-c -*-
+ * Myricom 10Gb Network Interface Card Software
+ * Copyright 2005-2010, Myricom, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ ****************************************************************/
+
+FILE_LICENCE ( GPL2_ONLY );
+
+#ifndef _myri10ge_mcp_h
+#define _myri10ge_mcp_h
+
+#define MXGEFW_VERSION_MAJOR	1
+#define MXGEFW_VERSION_MINOR	4
+
+#ifdef MXGEFW
+#ifndef _stdint_h_
+typedef signed char          int8_t;
+typedef signed short        int16_t;
+typedef signed int          int32_t;
+typedef signed long long    int64_t;
+typedef unsigned char       uint8_t;
+typedef unsigned short     uint16_t;
+typedef unsigned int       uint32_t;
+typedef unsigned long long uint64_t;
+#endif
+#endif
+
+/* 8 Bytes */
+struct mcp_dma_addr {
+  uint32_t high;
+  uint32_t low;
+};
+typedef struct mcp_dma_addr mcp_dma_addr_t;
+
+/* 4 Bytes */
+struct mcp_slot {
+  uint16_t checksum;
+  uint16_t length;
+};
+typedef struct mcp_slot mcp_slot_t;
+
+#ifdef MXGEFW_NDIS
+/* 8-byte descriptor, exclusively used by NDIS drivers. */
+struct mcp_slot_8 {
+  /* Place hash value at the top so it gets written before length.
+   * The driver polls length.
+   */
+  uint32_t hash;
+  uint16_t checksum;
+  uint16_t length;
+};
+typedef struct mcp_slot_8 mcp_slot_8_t;
+
+/* Two bits of length in mcp_slot are used to indicate hash type. */
+#define MXGEFW_RSS_HASH_NULL (0 << 14) /* bit 15:14 = 00 */
+#define MXGEFW_RSS_HASH_IPV4 (1 << 14) /* bit 15:14 = 01 */
+#define MXGEFW_RSS_HASH_TCP_IPV4 (2 << 14) /* bit 15:14 = 10 */
+#define MXGEFW_RSS_HASH_MASK (3 << 14) /* bit 15:14 = 11 */
+#endif
+
+/* 64 Bytes */
+struct mcp_cmd {
+  uint32_t cmd;
+  uint32_t data0;	/* will be low portion if data > 32 bits */
+  /* 8 */
+  uint32_t data1;	/* will be high portion if data > 32 bits */
+  uint32_t data2;	/* currently unused.. */
+  /* 16 */
+  struct mcp_dma_addr response_addr;
+  /* 24 */
+  uint8_t pad[40];
+};
+typedef struct mcp_cmd mcp_cmd_t;
+
+/* 8 Bytes */
+struct mcp_cmd_response {
+  uint32_t data;
+  uint32_t result;
+};
+typedef struct mcp_cmd_response mcp_cmd_response_t;
+
+
+
+/*
+   flags used in mcp_kreq_ether_send_t:
+
+   The SMALL flag is only needed in the first segment. It is raised
+   for packets that are total less or equal 512 bytes.
+
+   The CKSUM flag must be set in all segments.
+
+   The PADDED flags is set if the packet needs to be padded, and it
+   must be set for all segments.
+
+   The  MXGEFW_FLAGS_ALIGN_ODD must be set if the cumulative
+   length of all previous segments was odd.
+*/
+
+
+#define MXGEFW_FLAGS_SMALL      0x1
+#define MXGEFW_FLAGS_TSO_HDR    0x1
+#define MXGEFW_FLAGS_FIRST      0x2
+#define MXGEFW_FLAGS_ALIGN_ODD  0x4
+#define MXGEFW_FLAGS_CKSUM      0x8
+#define MXGEFW_FLAGS_TSO_LAST   0x8
+#define MXGEFW_FLAGS_NO_TSO     0x10
+#define MXGEFW_FLAGS_TSO_CHOP   0x10
+#define MXGEFW_FLAGS_TSO_PLD    0x20
+
+#define MXGEFW_SEND_SMALL_SIZE  1520
+#define MXGEFW_MAX_MTU          9400
+
+union mcp_pso_or_cumlen {
+  uint16_t pseudo_hdr_offset;
+  uint16_t cum_len;
+};
+typedef union mcp_pso_or_cumlen mcp_pso_or_cumlen_t;
+
+#define	MXGEFW_MAX_SEND_DESC 12
+#define MXGEFW_PAD	    2
+
+/* 16 Bytes */
+struct mcp_kreq_ether_send {
+  uint32_t addr_high;
+  uint32_t addr_low;
+  uint16_t pseudo_hdr_offset;
+  uint16_t length;
+  uint8_t  pad;
+  uint8_t  rdma_count;
+  uint8_t  cksum_offset; 	/* where to start computing cksum */
+  uint8_t  flags;	       	/* as defined above */
+};
+typedef struct mcp_kreq_ether_send mcp_kreq_ether_send_t;
+
+/* 8 Bytes */
+struct mcp_kreq_ether_recv {
+  uint32_t addr_high;
+  uint32_t addr_low;
+};
+typedef struct mcp_kreq_ether_recv mcp_kreq_ether_recv_t;
+
+
+/* Commands */
+
+#define	MXGEFW_BOOT_HANDOFF	0xfc0000
+#define	MXGEFW_BOOT_DUMMY_RDMA	0xfc01c0
+
+#define	MXGEFW_ETH_CMD		0xf80000
+#define	MXGEFW_ETH_SEND_4	0x200000
+#define	MXGEFW_ETH_SEND_1	0x240000
+#define	MXGEFW_ETH_SEND_2	0x280000
+#define	MXGEFW_ETH_SEND_3	0x2c0000
+#define	MXGEFW_ETH_RECV_SMALL	0x300000
+#define	MXGEFW_ETH_RECV_BIG	0x340000
+#define	MXGEFW_ETH_SEND_GO	0x380000
+#define	MXGEFW_ETH_SEND_STOP	0x3C0000
+
+#define	MXGEFW_ETH_SEND(n)		(0x200000 + (((n) & 0x03) * 0x40000))
+#define	MXGEFW_ETH_SEND_OFFSET(n)	(MXGEFW_ETH_SEND(n) - MXGEFW_ETH_SEND_4)
+
+enum myri10ge_mcp_cmd_type {
+  MXGEFW_CMD_NONE = 0,
+  /* Reset the mcp, it is left in a safe state, waiting
+     for the driver to set all its parameters */
+  MXGEFW_CMD_RESET = 1,
+
+  /* get the version number of the current firmware..
+     (may be available in the eeprom strings..? */
+  MXGEFW_GET_MCP_VERSION = 2,
+
+
+  /* Parameters which must be set by the driver before it can
+     issue MXGEFW_CMD_ETHERNET_UP. They persist until the next
+     MXGEFW_CMD_RESET is issued */
+
+  MXGEFW_CMD_SET_INTRQ_DMA = 3,
+  /* data0 = LSW of the host address
+   * data1 = MSW of the host address
+   * data2 = slice number if multiple slices are used
+   */
+
+  MXGEFW_CMD_SET_BIG_BUFFER_SIZE = 4,	/* in bytes, power of 2 */
+  MXGEFW_CMD_SET_SMALL_BUFFER_SIZE = 5,	/* in bytes */
+
+
+  /* Parameters which refer to lanai SRAM addresses where the
+     driver must issue PIO writes for various things */
+
+  MXGEFW_CMD_GET_SEND_OFFSET = 6,
+  MXGEFW_CMD_GET_SMALL_RX_OFFSET = 7,
+  MXGEFW_CMD_GET_BIG_RX_OFFSET = 8,
+  /* data0 = slice number if multiple slices are used */
+
+  MXGEFW_CMD_GET_IRQ_ACK_OFFSET = 9,
+  MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET = 10,
+
+  /* Parameters which refer to rings stored on the MCP,
+     and whose size is controlled by the mcp */
+
+  MXGEFW_CMD_GET_SEND_RING_SIZE = 11,	/* in bytes */
+  MXGEFW_CMD_GET_RX_RING_SIZE = 12,	/* in bytes */
+
+  /* Parameters which refer to rings stored in the host,
+     and whose size is controlled by the host.  Note that
+     all must be physically contiguous and must contain
+     a power of 2 number of entries.  */
+
+  MXGEFW_CMD_SET_INTRQ_SIZE = 13, 	/* in bytes */
+#define MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK  (1 << 31)
+
+  /* command to bring ethernet interface up.  Above parameters
+     (plus mtu & mac address) must have been exchanged prior
+     to issuing this command  */
+  MXGEFW_CMD_ETHERNET_UP = 14,
+
+  /* command to bring ethernet interface down.  No further sends
+     or receives may be processed until an MXGEFW_CMD_ETHERNET_UP
+     is issued, and all interrupt queues must be flushed prior
+     to ack'ing this command */
+
+  MXGEFW_CMD_ETHERNET_DOWN = 15,
+
+  /* commands the driver may issue live, without resetting
+     the nic.  Note that increasing the mtu "live" should
+     only be done if the driver has already supplied buffers
+     sufficiently large to handle the new mtu.  Decreasing
+     the mtu live is safe */
+
+  MXGEFW_CMD_SET_MTU = 16,
+  MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET = 17,  /* in microseconds */
+  MXGEFW_CMD_SET_STATS_INTERVAL = 18,   /* in microseconds */
+  MXGEFW_CMD_SET_STATS_DMA_OBSOLETE = 19, /* replaced by SET_STATS_DMA_V2 */
+
+  MXGEFW_ENABLE_PROMISC = 20,
+  MXGEFW_DISABLE_PROMISC = 21,
+  MXGEFW_SET_MAC_ADDRESS = 22,
+
+  MXGEFW_ENABLE_FLOW_CONTROL = 23,
+  MXGEFW_DISABLE_FLOW_CONTROL = 24,
+
+  /* do a DMA test
+     data0,data1 = DMA address
+     data2       = RDMA length (MSH), WDMA length (LSH)
+     command return data = repetitions (MSH), 0.5-ms ticks (LSH)
+  */
+  MXGEFW_DMA_TEST = 25,
+
+  MXGEFW_ENABLE_ALLMULTI = 26,
+  MXGEFW_DISABLE_ALLMULTI = 27,
+
+  /* returns MXGEFW_CMD_ERROR_MULTICAST
+     if there is no room in the cache
+     data0,MSH(data1) = multicast group address */
+  MXGEFW_JOIN_MULTICAST_GROUP = 28,
+  /* returns MXGEFW_CMD_ERROR_MULTICAST
+     if the address is not in the cache,
+     or is equal to FF-FF-FF-FF-FF-FF
+     data0,MSH(data1) = multicast group address */
+  MXGEFW_LEAVE_MULTICAST_GROUP = 29,
+  MXGEFW_LEAVE_ALL_MULTICAST_GROUPS = 30,
+
+  MXGEFW_CMD_SET_STATS_DMA_V2 = 31,
+  /* data0, data1 = bus addr,
+   * data2 = sizeof(struct mcp_irq_data) from driver point of view, allows
+   * adding new stuff to mcp_irq_data without changing the ABI
+   *
+   * If multiple slices are used, data2 contains both the size of the
+   * structure (in the lower 16 bits) and the slice number
+   * (in the upper 16 bits).
+   */
+
+  MXGEFW_CMD_UNALIGNED_TEST = 32,
+  /* same than DMA_TEST (same args) but abort with UNALIGNED on unaligned
+     chipset */
+
+  MXGEFW_CMD_UNALIGNED_STATUS = 33,
+  /* return data = boolean, true if the chipset is known to be unaligned */
+
+  MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS = 34,
+  /* data0 = number of big buffers to use.  It must be 0 or a power of 2.
+   * 0 indicates that the NIC consumes as many buffers as they are required
+   * for packet. This is the default behavior.
+   * A power of 2 number indicates that the NIC always uses the specified
+   * number of buffers for each big receive packet.
+   * It is up to the driver to ensure that this value is big enough for
+   * the NIC to be able to receive maximum-sized packets.
+   */
+
+  MXGEFW_CMD_GET_MAX_RSS_QUEUES = 35,
+  MXGEFW_CMD_ENABLE_RSS_QUEUES = 36,
+  /* data0 = number of slices n (0, 1, ..., n-1) to enable
+   * data1 = interrupt mode | use of multiple transmit queues.
+   * 0=share one INTx/MSI.
+   * 1=use one MSI-X per queue.
+   * If all queues share one interrupt, the driver must have set
+   * RSS_SHARED_INTERRUPT_DMA before enabling queues.
+   * 2=enable both receive and send queues.
+   * Without this bit set, only one send queue (slice 0's send queue)
+   * is enabled.  The receive queues are always enabled.
+   */
+#define MXGEFW_SLICE_INTR_MODE_SHARED          0x0
+#define MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE   0x1
+#define MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES 0x2
+
+  MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET = 37,
+  MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA = 38,
+  /* data0, data1 = bus address lsw, msw */
+  MXGEFW_CMD_GET_RSS_TABLE_OFFSET = 39,
+  /* get the offset of the indirection table */
+  MXGEFW_CMD_SET_RSS_TABLE_SIZE = 40,
+  /* set the size of the indirection table */
+  MXGEFW_CMD_GET_RSS_KEY_OFFSET = 41,
+  /* get the offset of the secret key */
+  MXGEFW_CMD_RSS_KEY_UPDATED = 42,
+  /* tell nic that the secret key's been updated */
+  MXGEFW_CMD_SET_RSS_ENABLE = 43,
+  /* data0 = enable/disable rss
+   * 0: disable rss.  nic does not distribute receive packets.
+   * 1: enable rss.  nic distributes receive packets among queues.
+   * data1 = hash type
+   * 1: IPV4            (required by RSS)
+   * 2: TCP_IPV4        (required by RSS)
+   * 3: IPV4 | TCP_IPV4 (required by RSS)
+   * 4: source port
+   * 5: source port + destination port
+   */
+#define MXGEFW_RSS_HASH_TYPE_IPV4      0x1
+#define MXGEFW_RSS_HASH_TYPE_TCP_IPV4  0x2
+#define MXGEFW_RSS_HASH_TYPE_SRC_PORT  0x4
+#define MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT 0x5
+#define MXGEFW_RSS_HASH_TYPE_MAX 0x5
+
+  MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE = 44,
+  /* Return data = the max. size of the entire headers of a IPv6 TSO packet.
+   * If the header size of a IPv6 TSO packet is larger than the specified
+   * value, then the driver must not use TSO.
+   * This size restriction only applies to IPv6 TSO.
+   * For IPv4 TSO, the maximum size of the headers is fixed, and the NIC
+   * always has enough header buffer to store maximum-sized headers.
+   */
+
+  MXGEFW_CMD_SET_TSO_MODE = 45,
+  /* data0 = TSO mode.
+   * 0: Linux/FreeBSD style (NIC default)
+   * 1: NDIS/NetBSD style
+   */
+#define MXGEFW_TSO_MODE_LINUX  0
+#define MXGEFW_TSO_MODE_NDIS   1
+
+  MXGEFW_CMD_MDIO_READ = 46,
+  /* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */
+  MXGEFW_CMD_MDIO_WRITE = 47,
+  /* data0 = dev_addr,  data1 = register/addr, data2 = value  */
+
+  MXGEFW_CMD_I2C_READ = 48,
+  /* Starts to get a fresh copy of one byte or of the module i2c table, the
+   * obtained data is cached inside the xaui-xfi chip :
+   *   data0 :  0 => get one byte, 1=> get 256 bytes
+   *   data1 :  If data0 == 0: location to refresh
+   *               bit 7:0  register location
+   *               bit 8:15 is the i2c slave addr (0 is interpreted as 0xA1)
+   *               bit 23:16 is the i2c bus number (for multi-port NICs)
+   *            If data0 == 1: unused
+   * The operation might take ~1ms for a single byte or ~65ms when refreshing all 256 bytes
+   * During the i2c operation,  MXGEFW_CMD_I2C_READ or MXGEFW_CMD_I2C_BYTE attempts
+   *  will return MXGEFW_CMD_ERROR_BUSY
+   */
+  MXGEFW_CMD_I2C_BYTE = 49,
+  /* Return the last obtained copy of a given byte in the xfp i2c table
+   * (copy cached during the last relevant MXGEFW_CMD_I2C_READ)
+   *   data0 : index of the desired table entry
+   *  Return data = the byte stored at the requested index in the table
+   */
+
+  MXGEFW_CMD_GET_VPUMP_OFFSET = 50,
+  /* Return data = NIC memory offset of mcp_vpump_public_global */
+  MXGEFW_CMD_RESET_VPUMP = 51,
+  /* Resets the VPUMP state */
+
+  MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE = 52,
+  /* data0 = mcp_slot type to use.
+   * 0 = the default 4B mcp_slot
+   * 1 = 8B mcp_slot_8
+   */
+#define MXGEFW_RSS_MCP_SLOT_TYPE_MIN        0
+#define MXGEFW_RSS_MCP_SLOT_TYPE_WITH_HASH  1
+
+  MXGEFW_CMD_SET_THROTTLE_FACTOR = 53,
+  /* set the throttle factor for ethp_z8e
+     data0 = throttle_factor
+     throttle_factor = 256 * pcie-raw-speed / tx_speed
+     tx_speed = 256 * pcie-raw-speed / throttle_factor
+
+     For PCI-E x8: pcie-raw-speed == 16Gb/s
+     For PCI-E x4: pcie-raw-speed == 8Gb/s
+
+     ex1: throttle_factor == 0x1a0 (416), tx_speed == 1.23GB/s == 9.846 Gb/s
+     ex2: throttle_factor == 0x200 (512), tx_speed == 1.0GB/s == 8 Gb/s
+
+     with tx_boundary == 2048, max-throttle-factor == 8191 => min-speed == 500Mb/s
+     with tx_boundary == 4096, max-throttle-factor == 4095 => min-speed == 1Gb/s
+  */
+
+  MXGEFW_CMD_VPUMP_UP = 54,
+  /* Allocates VPump Connection, Send Request and Zero copy buffer address tables */
+  MXGEFW_CMD_GET_VPUMP_CLK = 55,
+  /* Get the lanai clock */
+
+  MXGEFW_CMD_GET_DCA_OFFSET = 56,
+  /* offset of dca control for WDMAs */
+
+  /* VMWare NetQueue commands */
+  MXGEFW_CMD_NETQ_GET_FILTERS_PER_QUEUE = 57,
+  MXGEFW_CMD_NETQ_ADD_FILTER = 58,
+  /* data0 = filter_id << 16 | queue << 8 | type */
+  /* data1 = MS4 of MAC Addr */
+  /* data2 = LS2_MAC << 16 | VLAN_tag */
+  MXGEFW_CMD_NETQ_DEL_FILTER = 59,
+  /* data0 = filter_id */
+  MXGEFW_CMD_NETQ_QUERY1 = 60,
+  MXGEFW_CMD_NETQ_QUERY2 = 61,
+  MXGEFW_CMD_NETQ_QUERY3 = 62,
+  MXGEFW_CMD_NETQ_QUERY4 = 63,
+
+  MXGEFW_CMD_RELAX_RXBUFFER_ALIGNMENT = 64,
+  /* When set, small receive buffers can cross page boundaries.
+   * Both small and big receive buffers may start at any address.
+   * This option has performance implications, so use with caution.
+   */
+};
+typedef enum myri10ge_mcp_cmd_type myri10ge_mcp_cmd_type_t;
+
+
+enum myri10ge_mcp_cmd_status {
+  MXGEFW_CMD_OK = 0,
+  MXGEFW_CMD_UNKNOWN = 1,
+  MXGEFW_CMD_ERROR_RANGE = 2,
+  MXGEFW_CMD_ERROR_BUSY = 3,
+  MXGEFW_CMD_ERROR_EMPTY = 4,
+  MXGEFW_CMD_ERROR_CLOSED = 5,
+  MXGEFW_CMD_ERROR_HASH_ERROR = 6,
+  MXGEFW_CMD_ERROR_BAD_PORT = 7,
+  MXGEFW_CMD_ERROR_RESOURCES = 8,
+  MXGEFW_CMD_ERROR_MULTICAST = 9,
+  MXGEFW_CMD_ERROR_UNALIGNED = 10,
+  MXGEFW_CMD_ERROR_NO_MDIO = 11,
+  MXGEFW_CMD_ERROR_I2C_FAILURE = 12,
+  MXGEFW_CMD_ERROR_I2C_ABSENT = 13,
+  MXGEFW_CMD_ERROR_BAD_PCIE_LINK = 14
+};
+typedef enum myri10ge_mcp_cmd_status myri10ge_mcp_cmd_status_t;
+
+
+#define MXGEFW_OLD_IRQ_DATA_LEN 40
+
+struct mcp_irq_data {
+  /* add new counters at the beginning */
+  uint32_t future_use[1];
+  uint32_t dropped_pause;
+  uint32_t dropped_unicast_filtered;
+  uint32_t dropped_bad_crc32;
+  uint32_t dropped_bad_phy;
+  uint32_t dropped_multicast_filtered;
+/* 40 Bytes */
+  uint32_t send_done_count;
+
+#define MXGEFW_LINK_DOWN 0
+#define MXGEFW_LINK_UP 1
+#define MXGEFW_LINK_MYRINET 2
+#define MXGEFW_LINK_UNKNOWN 3
+  uint32_t link_up;
+  uint32_t dropped_link_overflow;
+  uint32_t dropped_link_error_or_filtered;
+  uint32_t dropped_runt;
+  uint32_t dropped_overrun;
+  uint32_t dropped_no_small_buffer;
+  uint32_t dropped_no_big_buffer;
+  uint32_t rdma_tags_available;
+
+  uint8_t tx_stopped;
+  uint8_t link_down;
+  uint8_t stats_updated;
+  uint8_t valid;
+};
+typedef struct mcp_irq_data mcp_irq_data_t;
+
+#ifdef MXGEFW_NDIS
+/* Exclusively used by NDIS drivers */
+struct mcp_rss_shared_interrupt {
+  uint8_t pad[2];
+  uint8_t queue;
+  uint8_t valid;
+};
+#endif
+
+/* definitions for NETQ filter type */
+#define MXGEFW_NETQ_FILTERTYPE_NONE 0
+#define MXGEFW_NETQ_FILTERTYPE_MACADDR 1
+#define MXGEFW_NETQ_FILTERTYPE_VLAN 2
+#define MXGEFW_NETQ_FILTERTYPE_VLANMACADDR 3
+
+#endif /* _myri10ge_mcp_h */
diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h
index 2e7d7fa..03234f4 100644
--- a/src/include/gpxe/errfile.h
+++ b/src/include/gpxe/errfile.h
@@ -115,6 +115,7 @@ FILE_LICENCE ( GPL2_OR_LATER );
 #define ERRFILE_ath5k		     ( ERRFILE_DRIVER | 0x00500000 )
 #define ERRFILE_atl1e		     ( ERRFILE_DRIVER | 0x00510000 )
 #define ERRFILE_sis190		     ( ERRFILE_DRIVER | 0x00520000 )
+#define ERRFILE_myri10ge	     ( ERRFILE_DRIVER | 0x00530000 )
 
 #define ERRFILE_scsi		     ( ERRFILE_DRIVER | 0x00700000 )
 #define ERRFILE_arbel		     ( ERRFILE_DRIVER | 0x00710000 )
diff --git a/src/include/gpxe/pci_ids.h b/src/include/gpxe/pci_ids.h
index f570c08..4207013 100644
--- a/src/include/gpxe/pci_ids.h
+++ b/src/include/gpxe/pci_ids.h
@@ -317,6 +317,7 @@ FILE_LICENCE ( GPL2_ONLY );
 #define PCI_VENDOR_ID_TIMEDIA		0x1409
 #define PCI_VENDOR_ID_OXSEMI		0x1415
 #define PCI_VENDOR_ID_AIRONET		0x14b9
+#define PCI_VENDOR_ID_MYRICOM		0x14c1
 #define PCI_VENDOR_ID_TITAN		0x14D2
 #define PCI_VENDOR_ID_PANACOM		0x14d4
 #define PCI_VENDOR_ID_BROADCOM		0x14e4
-- 
1.6.5.7


--------------090605050802070601060608--


More information about the gPXE mailing list