aboutsummaryrefslogtreecommitdiff
path: root/sys/dev
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/netmap/if_em_netmap.h236
-rw-r--r--sys/dev/netmap/if_igb_netmap.h277
-rw-r--r--sys/dev/netmap/if_lem_netmap.h269
-rw-r--r--sys/dev/netmap/if_re_netmap.h293
-rw-r--r--sys/dev/netmap/ixgbe_netmap.h463
-rw-r--r--sys/dev/netmap/netmap.c3193
-rw-r--r--sys/dev/netmap/netmap_freebsd.c410
-rw-r--r--sys/dev/netmap/netmap_generic.c818
-rw-r--r--sys/dev/netmap/netmap_kern.h596
-rw-r--r--sys/dev/netmap/netmap_mbq.c152
-rw-r--r--sys/dev/netmap/netmap_mbq.h78
-rw-r--r--sys/dev/netmap/netmap_mem2.c292
-rw-r--r--sys/dev/netmap/netmap_mem2.h15
-rw-r--r--sys/dev/netmap/netmap_vale.c1983
14 files changed, 5600 insertions, 3475 deletions
diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
index 1ea11238aaaf..dbbee4222407 100644
--- a/sys/dev/netmap/if_em_netmap.h
+++ b/sys/dev/netmap/if_em_netmap.h
@@ -26,7 +26,7 @@
/*
* $FreeBSD$
*
- * netmap support for em.
+ * netmap support for: em.
*
* For more details on netmap support please see ixgbe_netmap.h
*/
@@ -39,10 +39,6 @@
#include <dev/netmap/netmap_kern.h>
-static void em_netmap_block_tasks(struct adapter *);
-static void em_netmap_unblock_tasks(struct adapter *);
-
-
// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
@@ -85,45 +81,31 @@ em_netmap_unblock_tasks(struct adapter *adapter)
/*
- * Register/unregister routine
+ * Register/unregister. We are already under netmap lock.
*/
static int
-em_netmap_reg(struct ifnet *ifp, int onoff)
+em_netmap_reg(struct netmap_adapter *na, int onoff)
{
+ struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (na == NULL)
- return EINVAL; /* no netmap support here */
+ EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
em_netmap_block_tasks(adapter);
-
+ /* enable or disable flags and callbacks in na and ifp */
if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
-
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-
- em_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
+ nm_set_native_flags(na);
} else {
-fail:
- /* return to non-netmap mode */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- em_init_locked(adapter); /* also enable intr */
+ nm_clear_native_flags(na);
}
+ em_init_locked(adapter); /* also enable intr */
em_netmap_unblock_tasks(adapter);
- return (error);
+ EM_CORE_UNLOCK(adapter);
+ return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@@ -131,93 +113,103 @@ fail:
* Reconcile kernel and user view of the transmit ring.
*/
static int
-em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
+em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = &adapter->tx_rings[ring_nr];
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, new_slots;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+
+ if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
- * Process new packets to send. j is the current index in the
- * netmap ring, l is the corresponding index in the NIC ring.
+ * First part: process new packets to send.
*/
- j = kring->nr_hwcur;
- if (j != k) { /* we have new packets to send */
- l = netmap_idx_k2n(kring, j);
- for (n = 0; j != k; n++) {
- /* slot is the current slot in the netmap ring */
- struct netmap_slot *slot = &ring->slot[j];
- /* curr is the current slot in the nic ring */
- struct e1000_tx_desc *curr = &txr->tx_base[l];
- struct em_buffer *txbuf = &txr->tx_buffers[l];
- int flags = ((slot->flags & NS_REPORT) ||
- j == 0 || j == report_frequency) ?
- E1000_TXD_CMD_RS : 0;
+
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) { /* we have new packets to send */
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- u_int len = slot->len;
- if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
- return netmap_ring_reinit(kring);
- }
+ /* device-specific */
+ struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
+ struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
+ int flags = (slot->flags & NS_REPORT ||
+ nic_i == 0 || nic_i == report_frequency) ?
+ E1000_TXD_CMD_RS : 0;
+
+ NM_CHECK_ADDR_LEN(addr, len);
- slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
- slot->flags &= ~NS_BUF_CHANGED;
}
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+ /* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = k; /* the saved ring->cur */
- kring->nr_hwavail -= n;
+ kring->nr_hwcur = cur; /* the saved ring->cur */
+ /* decrease avail by # of packets sent minus previous ones */
+ kring->nr_hwavail -= new_slots;
+ /* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+ /* (re)start the tx unit up to slot nic_i (excluded) */
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
- if (n == 0 || kring->nr_hwavail < 1) {
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
- l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
- D("TDH wrap %d", l);
- l -= kring->nkr_num_slots;
+ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+ D("TDH wrap %d", nic_i);
+ nic_i -= kring->nkr_num_slots;
}
- delta = l - txr->next_to_clean;
+ delta = nic_i - txr->next_to_clean;
if (delta) {
/* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
- txr->next_to_clean = l;
+ txr->next_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
- /* update avail to what the kernel knows */
- ring->avail = kring->nr_hwavail;
+
+ nm_txsync_finalize(kring, cur);
return 0;
}
@@ -227,19 +219,23 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
-em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
+em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, resvd;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k = ring->cur, resvd = ring->reserved;
- k = ring->cur;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+
+ if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -247,84 +243,85 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
- * Import newly received packets into the netmap ring.
- * j is an index in the netmap ring, l in the NIC ring.
+ * First part: import newly received packets.
*/
- l = rxr->next_to_check;
- j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
+ nic_i = rxr->next_to_check;
+ nm_i = netmap_idx_n2k(kring, nic_i);
+
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &rxr->rx_base[l];
+ struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
- ring->slot[j].len = le16toh(curr->length);
- ring->slot[j].flags = slot_flags;
- bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
+ ring->slot[nm_i].len = le16toh(curr->length);
+ ring->slot[nm_i].flags = slot_flags;
+ bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
- j = (j == lim) ? 0 : j + 1;
+ nm_i = nm_next(nm_i, lim);
/* make sure next_to_refresh follows next_to_check */
- rxr->next_to_refresh = l; // XXX
- l = (l == lim) ? 0 : l + 1;
+ rxr->next_to_refresh = nic_i; // XXX
+ nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
- rxr->next_to_check = l;
+ rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur; /* netmap ring index */
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) { /* userspace has released some packets. */
- l = netmap_idx_k2n(kring, j); /* NIC ring index */
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &rxr->rx_base[l];
- struct em_buffer *rxbuf = &rxr->rx_buffers[l];
+ /*
+ * Second part: skip past packets that userspace has released.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) {
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- if (addr == netmap_buffer_base) { /* bad buf */
- return netmap_ring_reinit(kring);
- }
+ struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
+ struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
+
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
- curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
+ curr->buffer_addr = htole64(paddr);
netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+ kring->nr_hwcur = cur;
+
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move l back by one unit
+ * so move nic_i back by one unit
*/
- l = (l == 0) ? lim : l - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+ nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
- /* tell userspace that there are new packets */
+
+ /* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
+
return 0;
+
+ring_reset:
+ return netmap_ring_reinit(kring);
}
@@ -342,7 +339,8 @@ em_netmap_attach(struct adapter *adapter)
na.nm_txsync = em_netmap_txsync;
na.nm_rxsync = em_netmap_rxsync;
na.nm_register = em_netmap_reg;
- netmap_attach(&na, adapter->num_queues);
+ na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+ netmap_attach(&na);
}
/* end of file */
diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
index 10d94b5faa38..b91d0baba06f 100644
--- a/sys/dev/netmap/if_igb_netmap.h
+++ b/sys/dev/netmap/if_igb_netmap.h
@@ -37,44 +37,43 @@
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
+/*
+ * Adaptation to different versions of the driver.
+ */
+
+#ifndef IGB_MEDIA_RESET
+/* at the same time as IGB_MEDIA_RESET was defined, the
+ * tx buffer descriptor was renamed, so use this to revert
+ * back to the old name.
+ */
+#define igb_tx_buf igb_tx_buffer
+#endif
+
/*
- * register-unregister routine
+ * Register/unregister. We are already under netmap lock.
*/
static int
-igb_netmap_reg(struct ifnet *ifp, int onoff)
+igb_netmap_reg(struct netmap_adapter *na, int onoff)
{
+ struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (na == NULL)
- return EINVAL; /* no netmap support here */
+ IGB_CORE_LOCK(adapter);
igb_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ /* enable or disable flags and callbacks in na and ifp */
if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
-
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-
- igb_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
+ nm_set_native_flags(na);
} else {
-fail:
- /* restore if_transmit */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- igb_init_locked(adapter); /* also enable intr */
+ nm_clear_native_flags(na);
}
- return (error);
+ igb_init_locked(adapter); /* also enable intr */
+ IGB_CORE_UNLOCK(adapter);
+ return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@@ -82,68 +81,62 @@ fail:
* Reconcile kernel and user view of the transmit ring.
*/
static int
-igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
+igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = &adapter->tx_rings[ring_nr];
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, new_slots;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
- k = ring->cur;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+ /* 82575 needs the queue index added */
+ u32 olinfo_status =
+ (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
+
+ if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_POSTREAD);
+ BUS_DMASYNC_POSTREAD);
- /* check for new packets to send.
- * j indexes the netmap ring, l indexes the nic ring, and
- * j = kring->nr_hwcur, l = E1000_TDT (not tracked),
- * j == (l + kring->nkr_hwofs) % ring_size
+ /*
+ * First part: process new packets to send.
*/
- j = kring->nr_hwcur;
- if (j != k) { /* we have new packets to send */
- /* 82575 needs the queue index added */
- u32 olinfo_status =
- (adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
-
- l = netmap_idx_k2n(kring, j);
- for (n = 0; j != k; n++) {
- /* slot is the current slot in the netmap ring */
- struct netmap_slot *slot = &ring->slot[j];
- /* curr is the current slot in the nic ring */
- union e1000_adv_tx_desc *curr =
- (union e1000_adv_tx_desc *)&txr->tx_base[l];
-#ifndef IGB_MEDIA_RESET
-/* at the same time as IGB_MEDIA_RESET was defined, the
- * tx buffer descriptor was renamed, so use this to revert
- * back to the old name.
- */
-#define igb_tx_buf igb_tx_buffer
-#endif
- struct igb_tx_buf *txbuf = &txr->tx_buffers[l];
- int flags = ((slot->flags & NS_REPORT) ||
- j == 0 || j == report_frequency) ?
- E1000_ADVTXD_DCMD_RS : 0;
+
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) { /* we have new packets to send */
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- u_int len = slot->len;
- if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
- return netmap_ring_reinit(kring);
- }
+ /* device-specific */
+ union e1000_adv_tx_desc *curr =
+ (union e1000_adv_tx_desc *)&txr->tx_base[nic_i];
+ struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i];
+ int flags = (slot->flags & NS_REPORT ||
+ nic_i == 0 || nic_i == report_frequency) ?
+ E1000_ADVTXD_DCMD_RS : 0;
+
+ NM_CHECK_ADDR_LEN(addr, len);
- slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
- slot->flags &= ~NS_BUF_CHANGED;
}
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+ /* Fill the slot in the NIC ring. */
curr->read.buffer_addr = htole64(paddr);
// XXX check olinfo and cmd_type_len
curr->read.olinfo_status =
@@ -151,48 +144,56 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
(len<< E1000_ADVTXD_PAYLEN_SHIFT));
curr->read.cmd_type_len =
htole32(len | E1000_ADVTXD_DTYP_DATA |
- E1000_ADVTXD_DCMD_IFCS |
- E1000_ADVTXD_DCMD_DEXT |
- E1000_ADVTXD_DCMD_EOP | flags);
+ E1000_ADVTXD_DCMD_IFCS |
+ E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_EOP | flags);
+ /* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = k; /* the saved ring->cur */
- kring->nr_hwavail -= n;
+ kring->nr_hwcur = cur; /* the saved ring->cur */
+ /* decrease avail by # of packets sent minus previous ones */
+ kring->nr_hwavail -= new_slots;
/* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
+ /* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
+ /* (re)start the tx unit up to slot nic_i (excluded) */
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
- if (n == 0 || kring->nr_hwavail < 1) {
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
- l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
- if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
- D("TDH wrap %d", l);
- l -= kring->nkr_num_slots;
+ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
+ if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+ D("TDH wrap %d", nic_i);
+ nic_i -= kring->nkr_num_slots;
}
- delta = l - txr->next_to_clean;
+ delta = nic_i - txr->next_to_clean;
if (delta) {
/* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
- txr->next_to_clean = l;
+ txr->next_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
- /* update avail to what the kernel knows */
- ring->avail = kring->nr_hwavail;
+
+ nm_txsync_finalize(kring, cur);
return 0;
}
@@ -202,101 +203,107 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
-igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
+igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, resvd;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k = ring->cur, resvd = ring->reserved;
- k = ring->cur;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+
+ if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
- * import newly received packets into the netmap ring.
- * j is an index in the netmap ring, l in the NIC ring.
+ * First part: import newly received packets.
*/
- l = rxr->next_to_check;
- j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
+ nic_i = rxr->next_to_check;
+ nm_i = netmap_idx_n2k(kring, nic_i);
+
for (n = 0; ; n++) {
- union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
- ring->slot[j].len = le16toh(curr->wb.upper.length);
- ring->slot[j].flags = slot_flags;
+ ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
+ ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
- rxr->next_to_check = l;
+ rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur; /* netmap ring index */
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) { /* userspace has released some packets. */
- l = netmap_idx_k2n(kring, j);
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = ring->slot + j;
- union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
- struct igb_rx_buf *rxbuf = rxr->rx_buffers + l;
+ /*
+ * Second part: skip past packets that userspace has released.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) {
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- if (addr == netmap_buffer_base) { /* bad buf */
- return netmap_ring_reinit(kring);
- }
+ union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
+ struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
+
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
- curr->read.pkt_addr = htole64(paddr);
curr->wb.upper.status_error = 0;
+ curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
- BUS_DMASYNC_PREREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ BUS_DMASYNC_PREREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+ kring->nr_hwcur = cur;
+
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move l back by one unit
+ * so move nic_i back by one unit
*/
- l = (l == 0) ? lim : l - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
+ nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
- /* tell userspace that there are new packets */
+
+ /* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
+
return 0;
+
+ring_reset:
+ return netmap_ring_reinit(kring);
}
@@ -314,6 +321,8 @@ igb_netmap_attach(struct adapter *adapter)
na.nm_txsync = igb_netmap_txsync;
na.nm_rxsync = igb_netmap_rxsync;
na.nm_register = igb_netmap_reg;
- netmap_attach(&na, adapter->num_queues);
-}
+ na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+ netmap_attach(&na);
+}
+
/* end of file */
diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
index 25e5c7c27e3e..8ad3b7a2a352 100644
--- a/sys/dev/netmap/if_lem_netmap.h
+++ b/sys/dev/netmap/if_lem_netmap.h
@@ -27,11 +27,12 @@
/*
* $FreeBSD$
*
- * netmap support for "lem"
+ * netmap support for: lem
*
* For details on netmap support please see ixgbe_netmap.h
*/
+
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
@@ -40,17 +41,13 @@
/*
- * Register/unregister
+ * Register/unregister. We are already under netmap lock.
*/
static int
-lem_netmap_reg(struct ifnet *ifp, int onoff)
+lem_netmap_reg(struct netmap_adapter *na, int onoff)
{
+ struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (na == NULL)
- return EINVAL;
EM_CORE_LOCK(adapter);
@@ -64,24 +61,14 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
taskqueue_drain(adapter->tq, &adapter->rxtx_task);
taskqueue_drain(adapter->tq, &adapter->link_task);
#endif /* !EM_LEGCY_IRQ */
- if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-
- lem_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
+ /* enable or disable flags and callbacks in na and ifp */
+ if (onoff) {
+ nm_set_native_flags(na);
} else {
-fail:
- /* return to non-netmap mode */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- lem_init_locked(adapter); /* also enable intr */
+ nm_clear_native_flags(na);
}
+ lem_init_locked(adapter); /* also enable intr */
#ifndef EM_LEGACY_IRQ
taskqueue_unblock(adapter->tq); // XXX do we need this ?
@@ -89,7 +76,7 @@ fail:
EM_CORE_UNLOCK(adapter);
- return (error);
+ return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@@ -97,108 +84,102 @@ fail:
* Reconcile kernel and user view of the transmit ring.
*/
static int
-lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
+lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
-
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, new_slots;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
- int report_frequency = kring->nkr_num_slots >> 1;
-
- ND("%s: hwofs %d, hwcur %d hwavail %d lease %d cur %d avail %d",
- ifp->if_xname,
- kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
- kring->nkr_hwlease,
- ring->cur, ring->avail);
- /* take a copy of ring->cur now, and never read it again */
- k = ring->cur;
- if (k > lim)
+ u_int report_frequency = kring->nkr_num_slots >> 1;
+
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+
+ if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
+
/*
- * Process new packets to send. j is the current index in the
- * netmap ring, l is the corresponding index in the NIC ring.
+ * First part: process new packets to send.
*/
- j = kring->nr_hwcur;
- if (netmap_verbose > 255)
- RD(5, "device %s send %d->%d", ifp->if_xname, j, k);
- if (j != k) { /* we have new packets to send */
- l = netmap_idx_k2n(kring, j);
- for (n = 0; j != k; n++) {
- /* slot is the current slot in the netmap ring */
- struct netmap_slot *slot = &ring->slot[j];
- /* curr is the current slot in the nic ring */
- struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
- struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
- int flags = ((slot->flags & NS_REPORT) ||
- j == 0 || j == report_frequency) ?
- E1000_TXD_CMD_RS : 0;
+
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) { /* we have new packets to send */
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- u_int len = slot->len;
- if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
- return netmap_ring_reinit(kring);
- }
- ND("slot %d NIC %d %s", j, l, nm_dump_buf(addr, len, 128, NULL));
+ /* device-specific */
+ struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
+ struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
+ int flags = (slot->flags & NS_REPORT ||
+ nic_i == 0 || nic_i == report_frequency) ?
+ E1000_TXD_CMD_RS : 0;
+
+ NM_CHECK_ADDR_LEN(addr, len);
- slot->flags &= ~NS_REPORT;
- if (1 || slot->flags & NS_BUF_CHANGED) {
+ if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
- netmap_reload_map(adapter->txtag, txbuf->map, addr);
curr->buffer_addr = htole64(paddr);
- slot->flags &= ~NS_BUF_CHANGED;
+ netmap_reload_map(adapter->txtag, txbuf->map, addr);
}
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+ /* Fill the slot in the NIC ring. */
curr->upper.data = 0;
- curr->lower.data =
- htole32( adapter->txd_cmd | len |
+ curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
-
- ND("len %d kring %d nic %d", len, j, l);
bus_dmamap_sync(adapter->txtag, txbuf->map,
- BUS_DMASYNC_PREWRITE);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ BUS_DMASYNC_PREWRITE);
+
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
- ND("sent %d packets from %d, TDT now %d", n, kring->nr_hwcur, l);
- kring->nr_hwcur = k; /* the saved ring->cur */
- kring->nr_hwavail -= n;
+ kring->nr_hwcur = cur; /* the saved ring->cur */
+ /* decrease avail by # of packets sent minus previous ones */
+ kring->nr_hwavail -= new_slots;
+ /* synchronize the NIC ring */
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
- BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l);
+ /* (re)start the tx unit up to slot nic_i (excluded) */
+ E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
}
- if (n == 0 || kring->nr_hwavail < 1) {
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
- l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
- ND("tdh is now %d", l);
- if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
- D("bad TDH %d", l);
- l -= kring->nkr_num_slots;
+ nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
+ if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+ D("TDH wrap %d", nic_i);
+ nic_i -= kring->nkr_num_slots;
}
- delta = l - adapter->next_tx_to_clean;
+ delta = nic_i - adapter->next_tx_to_clean;
if (delta) {
- /* some tx completed, increment hwavail. */
+ /* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
- if (netmap_verbose > 255)
- RD(5, "%s tx recover %d bufs",
- ifp->if_xname, delta);
- adapter->next_tx_to_clean = l;
+ adapter->next_tx_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
- /* update avail to what the kernel knows */
- ring->avail = kring->nr_hwavail;
+
+ nm_txsync_finalize(kring, cur);
return 0;
}
@@ -208,39 +189,39 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
-lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
+lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, resvd;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k = ring->cur, resvd = ring->reserved;
- if (k > lim)
- return netmap_ring_reinit(kring);
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ if (cur > lim)
+ return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
- * Import newly received packets into the netmap ring.
- * j is an index in the netmap ring, l in the NIC ring.
+ * First part: import newly received packets.
*/
- l = adapter->next_rx_desc_to_check;
- j = netmap_idx_n2k(kring, l);
- ND("%s: next NIC %d kring %d (ofs %d), hwcur %d hwavail %d cur %d avail %d",
- ifp->if_xname,
- l, j, kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
- ring->cur, ring->avail);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
+ nic_i = adapter->next_rx_desc_to_check;
+ nm_i = netmap_idx_n2k(kring, nic_i);
+
for (n = 0; ; n++) {
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
uint32_t staterr = le32toh(curr->status);
int len;
@@ -248,76 +229,73 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
break;
len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
- D("bogus pkt size at %d", j);
+ D("bogus pkt size %d nic idx %d", len, nic_i);
len = 0;
}
- ND("\n%s", nm_dump_buf(NMB(&ring->slot[j]),
- len, 128, NULL));
- ring->slot[j].len = len;
- ring->slot[j].flags = slot_flags;
+ ring->slot[nm_i].len = len;
+ ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(adapter->rxtag,
- adapter->rx_buffer_area[l].map,
- BUS_DMASYNC_POSTREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ adapter->rx_buffer_area[nic_i].map,
+ BUS_DMASYNC_POSTREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
- adapter->next_rx_desc_to_check = l;
+ adapter->next_rx_desc_to_check = nic_i;
+ // ifp->if_ipackets += n;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur; /* netmap ring index */
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) { /* userspace has released some packets. */
- l = netmap_idx_k2n(kring, j); /* NIC ring index */
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = &ring->slot[j];
- struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
- struct em_buffer *rxbuf = &adapter->rx_buffer_area[l];
+ /*
+ * Second part: skip past packets that userspace has released.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) {
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- if (addr == netmap_buffer_base) { /* bad buf */
- return netmap_ring_reinit(kring);
- }
+ struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
+ struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
+
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
- netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
curr->buffer_addr = htole64(paddr);
+ netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
-
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
-
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+ kring->nr_hwcur = cur;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
- * so move l back by one unit
+ * so move nic_i back by one unit
*/
- l = (l == 0) ? lim : l - 1;
- E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l);
+ nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
- /* tell userspace that there are new packets */
+
+ /* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
+
return 0;
+
+ring_reset:
+ return netmap_ring_reinit(kring);
}
@@ -335,7 +313,8 @@ lem_netmap_attach(struct adapter *adapter)
na.nm_txsync = lem_netmap_txsync;
na.nm_rxsync = lem_netmap_rxsync;
na.nm_register = lem_netmap_reg;
- netmap_attach(&na, 1);
+ na.num_tx_rings = na.num_rx_rings = 1;
+ netmap_attach(&na);
}
/* end of file */
diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
index ac781ccb572e..2c7ba060cffd 100644
--- a/sys/dev/netmap/if_re_netmap.h
+++ b/sys/dev/netmap/if_re_netmap.h
@@ -26,8 +26,9 @@
/*
* $FreeBSD$
*
- * netmap support for "re"
- * For details on netmap support please see ixgbe_netmap.h
+ * netmap support for: re
+ *
+ * For more details on netmap support please see ixgbe_netmap.h
*/
@@ -39,44 +40,24 @@
/*
- * support for netmap register/unregisted. We are already under core lock.
- * only called on the first register or the last unregister.
+ * Register/unregister. We are already under netmap lock.
*/
static int
-re_netmap_reg(struct ifnet *ifp, int onoff)
+re_netmap_reg(struct netmap_adapter *na, int onoff)
{
+ struct ifnet *ifp = na->ifp;
struct rl_softc *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (na == NULL)
- return EINVAL;
- /* Tell the stack that the interface is no longer active */
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
-
- re_stop(adapter);
+ RL_LOCK(adapter);
+ re_stop(adapter); /* also clears IFF_DRV_RUNNING */
if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
-
- /* save if_transmit to restore it later */
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-
- re_init_locked(adapter);
-
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
+ nm_set_native_flags(na);
} else {
-fail:
- /* restore if_transmit */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- re_init_locked(adapter); /* also enables intr */
+ nm_clear_native_flags(na);
}
- return (error);
+ re_init_locked(adapter); /* also enables intr */
+ RL_UNLOCK(adapter);
+ return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@@ -84,90 +65,107 @@ fail:
* Reconcile kernel and user view of the transmit ring.
*/
static int
-re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
+re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct rl_softc *sc = ifp->if_softc;
- struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
- struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, k, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, new_slots;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_txsync_prologue(kring, &new_slots);
+
+ /* device-specific */
+ struct rl_softc *sc = ifp->if_softc;
+ struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
- k = ring->cur;
- if (k > lim)
+ if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
- /* Sync the TX descriptor list */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
- sc->rl_ldata.rl_tx_list_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
-
- /* XXX move after the transmissions */
- /* record completed transmissions */
- for (n = 0, l = sc->rl_ldata.rl_tx_considx;
- l != sc->rl_ldata.rl_tx_prodidx;
- n++, l = RL_TX_DESC_NXT(sc, l)) {
- uint32_t cmdstat =
- le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat);
- if (cmdstat & RL_TDESC_STAT_OWN)
- break;
- }
- if (n > 0) {
- sc->rl_ldata.rl_tx_considx = l;
- sc->rl_ldata.rl_tx_free += n;
- kring->nr_hwavail += n;
- }
+ sc->rl_ldata.rl_tx_list_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
- /* update avail to what the kernel knows */
- ring->avail = kring->nr_hwavail;
+ /*
+ * First part: process new packets to send.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) { /* we have new packets to send */
+ nic_i = sc->rl_ldata.rl_tx_prodidx;
+ // XXX or netmap_idx_k2n(kring, nm_i);
+
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ u_int len = slot->len;
+ uint64_t paddr;
+ void *addr = PNMB(slot, &paddr);
- j = kring->nr_hwcur;
- if (j != k) { /* we have new packets to send */
- l = sc->rl_ldata.rl_tx_prodidx;
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = &ring->slot[j];
- struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l];
+ /* device-specific */
+ struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
- uint64_t paddr;
- void *addr = PNMB(slot, &paddr);
- int len = slot->len;
- if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
- // XXX what about prodidx ?
- return netmap_ring_reinit(kring);
- }
+ NM_CHECK_ADDR_LEN(addr, len);
- if (l == lim) /* mark end of ring */
+ if (nic_i == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
- /* buffer has changed, unload and reload map */
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
- txd[l].tx_dmamap, addr);
- slot->flags &= ~NS_BUF_CHANGED;
+ txd[nic_i].tx_dmamap, addr);
}
- slot->flags &= ~NS_REPORT;
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+ /* Fill the slot in the NIC ring. */
desc->rl_cmdstat = htole32(cmd);
+
+ /* make sure changes to the buffer are synced */
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
- txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ txd[nic_i].tx_dmamap,
+ BUS_DMASYNC_PREWRITE);
+
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
- sc->rl_ldata.rl_tx_prodidx = l;
- kring->nr_hwcur = k; /* the saved ring->cur */
- ring->avail -= n; // XXX see others
- kring->nr_hwavail = ring->avail;
+ sc->rl_ldata.rl_tx_prodidx = nic_i;
+ /* decrease avail by # of packets sent minus previous ones */
+ kring->nr_hwcur = cur; /* the saved ring->cur */
+ kring->nr_hwavail -= new_slots;
+ /* synchronize the NIC ring */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
- sc->rl_ldata.rl_tx_list_map,
- BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+ sc->rl_ldata.rl_tx_list_map,
+ BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
/* start ? */
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
}
+
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
+ nic_i = sc->rl_ldata.rl_tx_considx;
+ for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
+ n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
+ uint32_t cmdstat =
+ le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat);
+ if (cmdstat & RL_TDESC_STAT_OWN)
+ break;
+ }
+ if (n > 0) {
+ sc->rl_ldata.rl_tx_considx = nic_i;
+ sc->rl_ldata.rl_tx_free += n;
+ kring->nr_hwavail += n;
+ }
+ }
+
+ nm_txsync_finalize(kring, cur);
+
return 0;
}
@@ -176,42 +174,45 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
-re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
+re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct rl_softc *sc = ifp->if_softc;
- struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
- struct netmap_adapter *na = NA(sc->rl_ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- int j, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, resvd;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k = ring->cur, resvd = ring->reserved;
- k = ring->cur;
- if (k > lim)
+ /* device-specific */
+ struct rl_softc *sc = ifp->if_softc;
+ struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
+
+ if (cur > lim)
return netmap_ring_reinit(kring);
- /* XXX check sync modes */
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
- sc->rl_ldata.rl_rx_list_map,
- BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
+ sc->rl_ldata.rl_rx_list_map,
+ BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
- * Import newly received packets into the netmap ring.
- * j is an index in the netmap ring, l in the NIC ring.
+ * First part: import newly received packets.
*
- * The device uses all the buffers in the ring, so we need
+ * This device uses all the buffers in the ring, so we need
* another termination condition in addition to RL_RDESC_STAT_OWN
* cleared (all buffers could have it cleared. The easiest one
* is to limit the amount of data reported up to 'lim'
*/
- l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
- j = netmap_idx_n2k(kring, l); /* the kring index */
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
+ nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
+ nm_i = netmap_idx_n2k(kring, nic_i);
+
for (n = kring->nr_hwavail; n < lim ; n++) {
- struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
+ struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
@@ -220,74 +221,75 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
total_len = rxstat & sc->rl_rxlenmask;
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
- kring->ring->slot[j].len = total_len;
- kring->ring->slot[j].flags = slot_flags;
+ ring->slot[nm_i].len = total_len;
+ ring->slot[nm_i].flags = slot_flags;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
if (n != kring->nr_hwavail) {
- sc->rl_ldata.rl_rx_prodidx = l;
+ sc->rl_ldata.rl_rx_prodidx = nic_i;
sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
kring->nr_hwavail = n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur;
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) { /* userspace has released some packets. */
- l = netmap_idx_k2n(kring, j); /* the NIC index */
- for (n = 0; j != k; n++) {
- struct netmap_slot *slot = ring->slot + j;
- struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l];
- int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
+ /*
+ * Second part: skip past packets that userspace has released.
+ */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) {
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- if (addr == netmap_buffer_base) { /* bad buf */
- return netmap_ring_reinit(kring);
- }
+ struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
+ int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
+
+ if (addr == netmap_buffer_base) /* bad buf */
+ goto ring_reset;
- if (l == lim) /* mark end of ring */
+ if (nic_i == lim) /* mark end of ring */
cmd |= RL_RDESC_CMD_EOR;
- slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
- netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
- rxd[l].rx_dmamap, addr);
+ /* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
+ netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
+ rxd[nic_i].rx_dmamap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
- rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ rxd[nic_i].rx_dmamap,
+ BUS_DMASYNC_PREREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
- /* Flush the RX DMA ring */
+ kring->nr_hwcur = cur;
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
- BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
+ BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
- /* tell userspace that there are new packets */
+
+ /* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
+
return 0;
+
+ring_reset:
+ return netmap_ring_reinit(kring);
}
+
/*
* Additional routines to init the tx and rx rings.
* In other drivers we do that inline in the main code.
@@ -299,11 +301,16 @@ re_netmap_tx_init(struct rl_softc *sc)
struct rl_desc *desc;
int i, n;
struct netmap_adapter *na = NA(sc->rl_ifp);
- struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
+ struct netmap_slot *slot;
+ if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
+ return;
+ }
+
+ slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode */
if (!slot)
- return;
+ return; // XXX cannot happen
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
@@ -377,6 +384,8 @@ re_netmap_attach(struct rl_softc *sc)
na.nm_txsync = re_netmap_txsync;
na.nm_rxsync = re_netmap_rxsync;
na.nm_register = re_netmap_reg;
- netmap_attach(&na, 1);
+ na.num_tx_rings = na.num_rx_rings = 1;
+ netmap_attach(&na);
}
+
/* end of file */
diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
index fca1cf1e0a90..7fd67d2b57ff 100644
--- a/sys/dev/netmap/ixgbe_netmap.h
+++ b/sys/dev/netmap/ixgbe_netmap.h
@@ -26,16 +26,16 @@
/*
* $FreeBSD$
*
- * netmap modifications for ixgbe
+ * netmap support for: ixgbe
*
* This file is meant to be a reference on how to implement
* netmap support for a network driver.
- * This file contains code but only static or inline functions
- * that are used by a single driver. To avoid replication of
- * code we just #include it near the beginning of the
- * standard driver.
+ * This file contains code but only static or inline functions used
+ * by a single driver. To avoid replication of code we just #include
+ * it near the beginning of the standard driver.
*/
+
#include <net/netmap.h>
#include <sys/selinfo.h>
/*
@@ -48,7 +48,10 @@
*/
#include <dev/netmap/netmap_kern.h>
+
/*
+ * device-specific sysctl variables:
+ *
* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
* During regular operations the CRC is stripped, but on some
* hardware reception of frames not multiple of 64 is slower,
@@ -56,17 +59,11 @@
*
* ix_rx_miss, ix_rx_miss_bufs:
* count packets that might be missed due to lost interrupts.
- *
- * ix_use_dd
- * use the dd bit for completed tx transmissions.
- * This is tricky, much better to use TDH for now.
*/
SYSCTL_DECL(_dev_netmap);
-static int ix_rx_miss, ix_rx_miss_bufs, ix_use_dd, ix_crcstrip;
+static int ix_rx_miss, ix_rx_miss_bufs, ix_crcstrip;
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ix_use_dd,
- CTLFLAG_RW, &ix_use_dd, 0, "use dd instead of tdh to detect tx frames");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
@@ -110,283 +107,235 @@ set_crcstrip(struct ixgbe_hw *hw, int onoff)
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
}
+
/*
- * Register/unregister. We are already under core lock.
+ * Register/unregister. We are already under netmap lock.
* Only called on the first register or the last unregister.
*/
static int
-ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
+ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
{
+ struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
- struct netmap_adapter *na = NA(ifp);
- int error = 0;
-
- if (na == NULL)
- return EINVAL; /* no netmap support here */
IXGBE_CORE_LOCK(adapter);
- ixgbe_disable_intr(adapter);
+ ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
set_crcstrip(&adapter->hw, onoff);
- if (onoff) { /* enable netmap mode */
- ifp->if_capenable |= IFCAP_NETMAP;
-
- /* save if_transmit and replace with our routine */
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-
- /*
- * reinitialize the adapter, now with netmap flag set,
- * so the rings will be set accordingly.
- */
- ixgbe_init_locked(adapter);
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
- error = ENOMEM;
- goto fail;
- }
- } else { /* reset normal mode (explicit request or netmap failed) */
-fail:
- /* restore if_transmit */
- ifp->if_transmit = na->if_transmit;
- ifp->if_capenable &= ~IFCAP_NETMAP;
- /* initialize the card, this time in standard mode */
- ixgbe_init_locked(adapter); /* also enables intr */
+ /* enable or disable flags and callbacks in na and ifp */
+ if (onoff) {
+ nm_set_native_flags(na);
+ } else {
+ nm_clear_native_flags(na);
}
- set_crcstrip(&adapter->hw, onoff);
+ ixgbe_init_locked(adapter); /* also enables intr */
+ set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
IXGBE_CORE_UNLOCK(adapter);
- return (error);
+ return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
- * This routine might be called frequently so it must be efficient.
- *
- * ring->cur holds the userspace view of the current ring index. Userspace
- * has filled the tx slots from the previous call's ring->cur up to but not
- * including ring->cur for this call. In this function the kernel updates
- * kring->nr_hwcur to ring->cur, thus slots [kring->nr_hwcur, ring->cur) are
- * now ready to transmit. At the last interrupt kring->nr_hwavail slots were
- * available.
*
- * This function runs under lock (acquired from the caller or internally).
- * It must first update ring->avail to what the kernel knows,
- * subtract the newly used slots (ring->cur - kring->nr_hwcur)
- * from both avail and nr_hwavail, and set ring->nr_hwcur = ring->cur
- * issuing a dmamap_sync on all slots.
+ * Userspace wants to send packets up to the one before ring->cur,
+ * kernel knows kring->nr_hwcur is the first unsent packet.
*
- * Since ring comes from userspace, its content must be read only once,
- * and validated before being used to update the kernel's structures.
- * (this is also true for every use of ring in the kernel).
+ * Here we push packets out (as many as possible), and possibly
+ * reclaim buffers from previously completed transmission.
*
- * ring->avail is never used, only checked for bogus values.
+ * ring->avail is not used on input, but it is updated on return.
*
- * I flags & FORCE_RECLAIM, reclaim transmitted
- * buffers irrespective of interrupt mitigation.
+ * The caller (netmap) guarantees that there is only one instance
+ * running at any time. Any interference with other driver
+ * methods should be handled by the individual drivers.
*/
static int
-ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
+ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct tx_ring *txr = &adapter->tx_rings[ring_nr];
- struct netmap_adapter *na = NA(adapter->ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, l, n = 0;
- u_int const k = ring->cur, lim = kring->nkr_num_slots - 1;
-
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, new_slots;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_txsync_prologue(kring, &new_slots);
/*
- * ixgbe can generate an interrupt on every tx packet, but it
- * seems very expensive, so we interrupt once every half ring,
- * or when requested with NS_REPORT
+ * interrupts on every tx packet are expensive so request
+ * them every half ring, or where NS_REPORT is set
*/
u_int report_frequency = kring->nkr_num_slots >> 1;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct tx_ring *txr = &adapter->tx_rings[ring_nr];
+ int reclaim_tx;
+
+ if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
- * Process new packets to send. j is the current index in the
- * netmap ring, l is the corresponding index in the NIC ring.
+ * First part: process new packets to send.
+ * nm_i is the current index in the netmap ring,
+ * nic_i is the corresponding index in the NIC ring.
* The two numbers differ because upon a *_init() we reset
* the NIC ring but leave the netmap ring unchanged.
* For the transmit ring, we have
*
- * j = kring->nr_hwcur
- * l = IXGBE_TDT (not tracked in the driver)
+ * nm_i = kring->nr_hwcur
+ * nic_i = IXGBE_TDT (not tracked in the driver)
* and
- * j == (l + kring->nkr_hwofs) % ring_size
+ * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* In this driver kring->nkr_hwofs >= 0, but for other
* drivers it might be negative as well.
*/
- j = kring->nr_hwcur;
- if (j != k) { /* we have new packets to send */
- prefetch(&ring->slot[j]);
- l = netmap_idx_k2n(kring, j); /* NIC index */
- prefetch(&txr->tx_buffers[l]);
- for (n = 0; j != k; n++) {
- /*
- * Collect per-slot info.
- * Note that txbuf and curr are indexed by l.
- *
- * In this driver we collect the buffer address
- * (using the PNMB() macro) because we always
- * need to rewrite it into the NIC ring.
- * Many other drivers preserve the address, so
- * we only need to access it if NS_BUF_CHANGED
- * is set.
- * XXX note, on this device the dmamap* calls are
- * not necessary because tag is 0, however just accessing
- * the per-packet tag kills 1Mpps at 900 MHz.
- */
- struct netmap_slot *slot = &ring->slot[j];
- union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
- struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
- uint64_t paddr;
- // XXX type for flags and len ?
- int flags = ((slot->flags & NS_REPORT) ||
- j == 0 || j == report_frequency) ?
- IXGBE_TXD_CMD_RS : 0;
+
+ /*
+ * If we have packets to send (kring->nr_hwcur != ring->cur)
+ * iterate over the netmap ring, fetch length and update
+ * the corresponding slot in the NIC ring. Some drivers also
+ * need to update the buffer's physical address in the NIC slot
+ * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
+ *
+ * The netmap_reload_map() calls is especially expensive,
+ * even when (as in this case) the tag is 0, so do only
+ * when the buffer has actually changed.
+ *
+ * If possible do not set the report/intr bit on all slots,
+ * but only a few times per ring or when NS_REPORT is set.
+ *
+ * Finally, on 10G and faster drivers, it might be useful
+ * to prefetch the next slot and txr entry.
+ */
+
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) { /* we have new packets to send */
+ nic_i = netmap_idx_k2n(kring, nm_i);
+
+ prefetch(&ring->slot[nm_i]);
+ prefetch(&txr->tx_buffers[nic_i]);
+
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
+ uint64_t paddr;
void *addr = PNMB(slot, &paddr);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
- prefetch(&ring->slot[j]);
- prefetch(&txr->tx_buffers[l]);
-
- /*
- * Quick check for valid addr and len.
- * NMB() returns netmap_buffer_base for invalid
- * buffer indexes (but the address is still a
- * valid one to be used in a ring). slot->len is
- * unsigned so no need to check for negative values.
- */
- if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
-ring_reset:
- return netmap_ring_reinit(kring);
- }
+ /* device-specific */
+ union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
+ struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
+ int flags = (slot->flags & NS_REPORT ||
+ nic_i == 0 || nic_i == report_frequency) ?
+ IXGBE_TXD_CMD_RS : 0;
+
+ /* prefetch for next round */
+ prefetch(&ring->slot[nm_i + 1]);
+ prefetch(&txr->tx_buffers[nic_i + 1]);
+
+ NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
- /* buffer has changed, unload and reload map */
+ /* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
- slot->flags &= ~NS_BUF_CHANGED;
}
- slot->flags &= ~NS_REPORT;
- /*
- * Fill the slot in the NIC ring.
- * In this driver we need to rewrite the buffer
- * address in the NIC ring. Other drivers do not
- * need this.
- * Use legacy descriptor, it is faster.
- */
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+
+ /* Fill the slot in the NIC ring. */
+ /* Use legacy descriptor, they are faster? */
curr->read.buffer_addr = htole64(paddr);
curr->read.olinfo_status = 0;
curr->read.cmd_type_len = htole32(len | flags |
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
/* make sure changes to the buffer are synced */
- bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE);
+ bus_dmamap_sync(txr->txtag, txbuf->map,
+ BUS_DMASYNC_PREWRITE);
+
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
- kring->nr_hwcur = k; /* the saved ring->cur */
- /* decrease avail by number of packets sent */
- kring->nr_hwavail -= n;
+ kring->nr_hwcur = cur; /* the saved ring->cur */
+ /* decrease avail by # of packets sent minus previous ones */
+ kring->nr_hwavail -= new_slots;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- /* (re)start the transmitter up to slot l (excluded) */
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
+
+ /* (re)start the tx unit up to slot nic_i (excluded) */
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i);
}
/*
- * Reclaim buffers for completed transmissions.
+ * Second part: reclaim buffers for completed transmissions.
* Because this is expensive (we read a NIC register etc.)
* we only do it in specific cases (see below).
- * In all cases kring->nr_kflags indicates which slot will be
- * checked upon a tx interrupt (nkr_num_slots means none).
*/
if (flags & NAF_FORCE_RECLAIM) {
- j = 1; /* forced reclaim, ignore interrupts */
- kring->nr_kflags = kring->nkr_num_slots;
+ reclaim_tx = 1; /* forced reclaim */
} else if (kring->nr_hwavail > 0) {
- j = 0; /* buffers still available: no reclaim, ignore intr. */
- kring->nr_kflags = kring->nkr_num_slots;
+ reclaim_tx = 0; /* have buffers, no reclaim */
} else {
/*
- * no buffers available, locate a slot for which we request
- * ReportStatus (approximately half ring after next_to_clean)
- * and record it in kring->nr_kflags.
- * If the slot has DD set, do the reclaim looking at TDH,
- * otherwise we go to sleep (in netmap_poll()) and will be
- * woken up when slot nr_kflags will be ready.
+ * No buffers available. Locate previous slot with
+ * REPORT_STATUS set.
+ * If the slot has DD set, we can reclaim space,
+ * otherwise wait for the next interrupt.
+ * This enables interrupt moderation on the tx
+ * side though it might reduce throughput.
*/
struct ixgbe_legacy_tx_desc *txd =
(struct ixgbe_legacy_tx_desc *)txr->tx_base;
- j = txr->next_to_clean + kring->nkr_num_slots/2;
- if (j >= kring->nkr_num_slots)
- j -= kring->nkr_num_slots;
+ nic_i = txr->next_to_clean + report_frequency;
+ if (nic_i > lim)
+ nic_i -= lim + 1;
// round to the closest with dd set
- j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
+ nic_i = (nic_i < kring->nkr_num_slots / 4 ||
+ nic_i >= kring->nkr_num_slots*3/4) ?
0 : report_frequency;
- kring->nr_kflags = j; /* the slot to check */
- j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
+ reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
}
- if (j) {
- int delta;
-
+ if (reclaim_tx) {
/*
* Record completed transmissions.
* We (re)use the driver's txr->next_to_clean to keep
* track of the most recently completed transmission.
*
- * The datasheet discourages the use of TDH to find out the
- * number of sent packets. We should rather check the DD
- * status bit in a packet descriptor. However, we only set
- * the "report status" bit for some descriptors (a kind of
- * interrupt mitigation), so we can only check on those.
- * For the time being we use TDH, as we do it infrequently
- * enough not to pose performance problems.
+ * The datasheet discourages the use of TDH to find
+ * out the number of sent packets, but we only set
+ * REPORT_STATUS in a few slots so TDH is the only
+ * good way.
*/
- if (ix_use_dd) {
- struct ixgbe_legacy_tx_desc *txd =
- (struct ixgbe_legacy_tx_desc *)txr->tx_base;
- u_int k1 = netmap_idx_k2n(kring, kring->nr_hwcur);
- l = txr->next_to_clean;
- delta = 0;
- while (l != k1 &&
- txd[l].upper.fields.status & IXGBE_TXD_STAT_DD) {
- delta++;
- l = (l == lim) ? 0 : l + 1;
- }
- } else {
- l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
- if (l >= kring->nkr_num_slots) { /* XXX can happen */
- D("TDH wrap %d", l);
- l -= kring->nkr_num_slots;
+ nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
+ if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
+ D("TDH wrap %d", nic_i);
+ nic_i -= kring->nkr_num_slots;
}
- delta = l - txr->next_to_clean;
- }
- if (delta) {
+ if (nic_i != txr->next_to_clean) {
+ n = (nic_i + lim + 1) - txr->next_to_clean;
+ if (n > lim)
+ n -= lim + 1;
/* some tx completed, increment avail */
- if (delta < 0)
- delta += kring->nkr_num_slots;
- txr->next_to_clean = l;
- kring->nr_hwavail += delta;
- if (kring->nr_hwavail > lim)
- goto ring_reset;
+ txr->next_to_clean = nic_i;
+ kring->nr_hwavail += n;
+ if (kring->nr_hwavail > lim) {
+ RD(5, "bad hwavail %d",
+ kring->nr_hwavail);
+ return netmap_ring_reinit(kring);
+ }
}
}
- /* update avail to what the kernel knows */
- ring->avail = kring->nr_hwavail;
+
+ nm_txsync_finalize(kring, cur);
return 0;
}
@@ -394,11 +343,12 @@ ring_reset:
/*
* Reconcile kernel and user view of the receive ring.
- * Same as for the txsync, this routine must be efficient and
- * avoid races in accessing the shared regions.
+ * Same as for the txsync, this routine must be efficient.
+ * The caller guarantees a single invocations, but races against
+ * the rest of the driver should be handled here.
*
- * When called, userspace has read data from slots kring->nr_hwcur
- * up to ring->cur (excluded).
+ * When called, userspace has released buffers up to
+ * ring->cur - ring->reserved (last one excluded).
*
* The last interrupt reported kring->nr_hwavail slots available
* after kring->nr_hwcur.
@@ -410,18 +360,23 @@ ring_reset:
* of whether or not we received an interrupt.
*/
static int
-ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
+ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
- struct adapter *adapter = ifp->if_softc;
- struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
- struct netmap_adapter *na = NA(adapter->ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
- u_int j, l, n, lim = kring->nkr_num_slots - 1;
+ u_int nm_i; /* index into the netmap ring */
+ u_int nic_i; /* index into the NIC ring */
+ u_int n, resvd;
+ u_int const lim = kring->nkr_num_slots - 1;
+ u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- u_int k = ring->cur, resvd = ring->reserved;
- if (k > lim)
+ /* device-specific */
+ struct adapter *adapter = ifp->if_softc;
+ struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
+
+ if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@@ -429,17 +384,17 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
- * First part, import newly received packets into the netmap ring.
+ * First part: import newly received packets.
*
- * j is the index of the next free slot in the netmap ring,
- * and l is the index of the next received packet in the NIC ring,
+ * nm_i is the index of the next free slot in the netmap ring,
+ * nic_i is the index of the next received packet in the NIC ring,
* and they may differ in case if_init() has been called while
* in netmap mode. For the receive ring we have
*
- * j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
- * l = rxr->next_to_check;
+ * nm_i = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
+ * nic_i = rxr->next_to_check;
* and
- * j == (l + kring->nkr_hwofs) % ring_size
+ * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* rxr->next_to_check is set to 0 on a ring reinit
*/
@@ -447,21 +402,21 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
int crclen = ix_crcstrip ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
- l = rxr->next_to_check;
- j = netmap_idx_n2k(kring, l);
+ nic_i = rxr->next_to_check;
+ nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
- ring->slot[j].len = le16toh(curr->wb.upper.length) - crclen;
- ring->slot[j].flags = slot_flags;
+ ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
+ ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
- rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
if (netmap_no_pendintr && !force_update) {
@@ -469,48 +424,36 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
ix_rx_miss ++;
ix_rx_miss_bufs += n;
}
- rxr->next_to_check = l;
+ rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
- * Skip past packets that userspace has released
- * (from kring->nr_hwcur to ring->cur - ring->reserved excluded),
+ * Second part: skip past packets that userspace has released.
+ * (kring->nr_hwcur to ring->cur - ring->reserved excluded),
* and make the buffers available for reception.
- * As usual j is the index in the netmap ring, l is the index
- * in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
+ * As usual nm_i is the index in the netmap ring,
+ * nic_i is the index in the NIC ring, and
+ * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
- j = kring->nr_hwcur;
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
- if (j != k) { /* userspace has released some packets. */
- l = netmap_idx_k2n(kring, j);
- for (n = 0; j != k; n++) {
- /* collect per-slot info, with similar validations
- * and flag handling as in the txsync code.
- *
- * NOTE curr and rxbuf are indexed by l.
- * Also, this driver needs to update the physical
- * address in the NIC ring, but other drivers
- * may not have this requirement.
- */
- struct netmap_slot *slot = &ring->slot[j];
- union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
- struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
+ nm_i = kring->nr_hwcur;
+ if (nm_i != cur) {
+ nic_i = netmap_idx_k2n(kring, nm_i);
+ for (n = 0; nm_i != cur; n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
+ union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
+ struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
+
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
+ /* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
@@ -518,20 +461,23 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
BUS_DMASYNC_PREREAD);
- j = (j == lim) ? 0 : j + 1;
- l = (l == lim) ? 0 : l + 1;
+ nm_i = nm_next(nm_i, lim);
+ nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
+ kring->nr_hwcur = cur;
+
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- /* IMPORTANT: we must leave one free slot in the ring,
- * so move l back by one unit
+ /*
+ * IMPORTANT: we must leave one free slot in the ring,
+ * so move nic_i back by one unit
*/
- l = (l == 0) ? lim : l - 1;
- IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
+ nic_i = (nic_i == 0) ? lim : nic_i - 1;
+ IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
- /* tell userspace that there are new packets */
+
+ /* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
@@ -562,7 +508,8 @@ ixgbe_netmap_attach(struct adapter *adapter)
na.nm_txsync = ixgbe_netmap_txsync;
na.nm_rxsync = ixgbe_netmap_rxsync;
na.nm_register = ixgbe_netmap_reg;
- netmap_attach(&na, adapter->num_queues);
-}
+ na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
+ netmap_attach(&na);
+}
/* end of file */
diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
index 19be406f6dbc..033cd3059e17 100644
--- a/sys/dev/netmap/netmap.c
+++ b/sys/dev/netmap/netmap.c
@@ -8,7 +8,7 @@
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -25,6 +25,8 @@
/*
+ * $FreeBSD$
+ *
* This module supports memory mapped access to network devices,
* see netmap(4).
*
@@ -130,131 +132,36 @@ ports attached to the switch)
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
-__FBSDID("$FreeBSD$");
-
#include <sys/types.h>
-#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
-#include <sys/jail.h>
#include <sys/kernel.h> /* types used in module initialization */
-#include <sys/conf.h> /* cdevsw struct */
-#include <sys/uio.h> /* uio struct */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
#include <sys/sockio.h>
#include <sys/socketvar.h> /* struct socket */
#include <sys/malloc.h>
-#include <sys/mman.h> /* PROT_EXEC */
#include <sys/poll.h>
-#include <sys/proc.h>
#include <sys/rwlock.h>
-#include <vm/vm.h> /* vtophys */
-#include <vm/pmap.h> /* vtophys */
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/bpf.h> /* BIOCIMMEDIATE */
-#include <net/vnet.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#include <sys/refcount.h>
-#define prefetch(x) __builtin_prefetch(x)
-
-#define BDG_RWLOCK_T struct rwlock // struct rwlock
-
-#define BDG_RWINIT(b) \
- rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
-#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
-#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
-#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
-#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
-#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
-#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
-
-
-/* netmap global lock.
- * normally called within the user thread (upon a system call)
- * or when a file descriptor or process is terminated
- * (last close or last munmap)
- */
-
-#define NMG_LOCK_T struct mtx
-#define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, "netmap global lock", NULL, MTX_DEF)
-#define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock)
-#define NMG_LOCK() mtx_lock(&netmap_global_lock)
-#define NMG_UNLOCK() mtx_unlock(&netmap_global_lock)
-#define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED)
+/* reduce conditional code */
+#define init_waitqueue_head(x) // only needed in linux
-/* atomic operations */
-#include <machine/atomic.h>
-#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
-#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
#elif defined(linux)
#include "bsd_glue.h"
-static netdev_tx_t linux_netmap_start_xmit(struct sk_buff *, struct net_device *);
-
-static struct device_driver*
-linux_netmap_find_driver(struct device *dev)
-{
- struct device_driver *dd;
-
- while ( (dd = dev->driver) == NULL ) {
- if ( (dev = dev->parent) == NULL )
- return NULL;
- }
- return dd;
-}
-
-static struct net_device*
-ifunit_ref(const char *name)
-{
- struct net_device *ifp = dev_get_by_name(&init_net, name);
- struct device_driver *dd;
-
- if (ifp == NULL)
- return NULL;
-
- if ( (dd = linux_netmap_find_driver(&ifp->dev)) == NULL )
- goto error;
-
- if (!try_module_get(dd->owner))
- goto error;
-
- return ifp;
-error:
- dev_put(ifp);
- return NULL;
-}
-
-static void
-if_rele(struct net_device *ifp)
-{
- struct device_driver *dd;
- dd = linux_netmap_find_driver(&ifp->dev);
- dev_put(ifp);
- if (dd)
- module_put(dd->owner);
-}
-
-// XXX a mtx would suffice here too 20130404 gl
-#define NMG_LOCK_T struct semaphore
-#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1)
-#define NMG_LOCK_DESTROY()
-#define NMG_LOCK() down(&netmap_global_lock)
-#define NMG_UNLOCK() up(&netmap_global_lock)
-#define NMG_LOCK_ASSERT() // XXX to be completed
#elif defined(__APPLE__)
@@ -306,57 +213,46 @@ int netmap_txsync_retry = 2;
SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
&netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
-int netmap_drop = 0; /* debugging */
int netmap_flags = 0; /* debug flags */
int netmap_fwd = 0; /* force transparent mode */
int netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
-SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
+/*
+ * netmap_admode selects the netmap mode to use.
+ * Invalid values are reset to NETMAP_ADMODE_BEST
+ */
+enum { NETMAP_ADMODE_BEST = 0, /* use native, fallback to generic */
+ NETMAP_ADMODE_NATIVE, /* either native or none */
+ NETMAP_ADMODE_GENERIC, /* force generic */
+ NETMAP_ADMODE_LAST };
+#define NETMAP_ADMODE_NATIVE 1 /* Force native netmap adapter. */
+#define NETMAP_ADMODE_GENERIC 2 /* Force generic netmap adapter. */
+#define NETMAP_ADMODE_BEST 0 /* Priority to native netmap adapter. */
+static int netmap_admode = NETMAP_ADMODE_BEST;
+
+int netmap_generic_mit = 100*1000; /* Generic mitigation interval in nanoseconds. */
+int netmap_generic_ringsize = 1024; /* Generic ringsize. */
+
SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
NMG_LOCK_T netmap_global_lock;
-/*
- * protect against multiple threads using the same ring.
- * also check that the ring has not been stopped.
- */
-#define NM_KR_BUSY 1
-#define NM_KR_STOPPED 2
-static void nm_kr_put(struct netmap_kring *kr);
-static __inline int nm_kr_tryget(struct netmap_kring *kr)
-{
- /* check a first time without taking the lock
- * to avoid starvation for nm_kr_get()
- */
- if (unlikely(kr->nkr_stopped)) {
- ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
- return NM_KR_STOPPED;
- }
- if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
- return NM_KR_BUSY;
- /* check a second time with lock held */
- if (unlikely(kr->nkr_stopped)) {
- ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
- nm_kr_put(kr);
- return NM_KR_STOPPED;
- }
- return 0;
-}
-
-static __inline void nm_kr_put(struct netmap_kring *kr)
-{
- NM_ATOMIC_CLEAR(&kr->nr_busy);
-}
-static void nm_kr_get(struct netmap_kring *kr)
+static void
+nm_kr_get(struct netmap_kring *kr)
{
while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
tsleep(kr, 0, "NM_KR_GET", 4);
}
-static void nm_disable_ring(struct netmap_kring *kr)
+
+void
+netmap_disable_ring(struct netmap_kring *kr)
{
kr->nkr_stopped = 1;
nm_kr_get(kr);
@@ -365,7 +261,9 @@ static void nm_disable_ring(struct netmap_kring *kr)
nm_kr_put(kr);
}
-void netmap_disable_all_rings(struct ifnet *ifp)
+
+static void
+netmap_set_all_rings(struct ifnet *ifp, int stopped)
{
struct netmap_adapter *na;
int i;
@@ -375,35 +273,37 @@ void netmap_disable_all_rings(struct ifnet *ifp)
na = NA(ifp);
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- nm_disable_ring(na->tx_rings + i);
- selwakeuppri(&na->tx_rings[i].si, PI_NET);
+ for (i = 0; i <= na->num_tx_rings; i++) {
+ if (stopped)
+ netmap_disable_ring(na->tx_rings + i);
+ else
+ na->tx_rings[i].nkr_stopped = 0;
+ na->nm_notify(na, i, NR_TX, NAF_DISABLE_NOTIFY |
+ (i == na->num_tx_rings ? NAF_GLOBAL_NOTIFY: 0));
}
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- nm_disable_ring(na->rx_rings + i);
- selwakeuppri(&na->rx_rings[i].si, PI_NET);
+
+ for (i = 0; i <= na->num_rx_rings; i++) {
+ if (stopped)
+ netmap_disable_ring(na->rx_rings + i);
+ else
+ na->rx_rings[i].nkr_stopped = 0;
+ na->nm_notify(na, i, NR_RX, NAF_DISABLE_NOTIFY |
+ (i == na->num_rx_rings ? NAF_GLOBAL_NOTIFY: 0));
}
- selwakeuppri(&na->tx_si, PI_NET);
- selwakeuppri(&na->rx_si, PI_NET);
}
-void netmap_enable_all_rings(struct ifnet *ifp)
+
+void
+netmap_disable_all_rings(struct ifnet *ifp)
{
- struct netmap_adapter *na;
- int i;
+ netmap_set_all_rings(ifp, 1 /* stopped */);
+}
- if (!(ifp->if_capenable & IFCAP_NETMAP))
- return;
- na = NA(ifp);
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- D("enabling %p", na->tx_rings + i);
- na->tx_rings[i].nkr_stopped = 0;
- }
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- D("enabling %p", na->rx_rings + i);
- na->rx_rings[i].nkr_stopped = 0;
- }
+void
+netmap_enable_all_rings(struct ifnet *ifp)
+{
+ netmap_set_all_rings(ifp, 0 /* enabled */);
}
@@ -432,6 +332,7 @@ nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
return *v;
}
+
/*
* packet-dump function, user-supplied or static buffer.
* The destination buffer must be at least 30+4*len
@@ -440,7 +341,7 @@ const char *
nm_dump_buf(char *p, int len, int lim, char *dst)
{
static char _dst[8192];
- int i, j, i0;
+ int i, j, i0;
static char hex[] ="0123456789abcdef";
char *o; /* output position */
@@ -477,358 +378,13 @@ nm_dump_buf(char *p, int len, int lim, char *dst)
return dst;
}
-/*
- * system parameters (most of them in netmap_kern.h)
- * NM_NAME prefix for switch port names, default "vale"
- * NM_BDG_MAXPORTS number of ports
- * NM_BRIDGES max number of switches in the system.
- * XXX should become a sysctl or tunable
- *
- * Switch ports are named valeX:Y where X is the switch name and Y
- * is the port. If Y matches a physical interface name, the port is
- * connected to a physical device.
- *
- * Unlike physical interfaces, switch ports use their own memory region
- * for rings and buffers.
- * The virtual interfaces use per-queue lock instead of core lock.
- * In the tx loop, we aggregate traffic in batches to make all operations
- * faster. The batch size is bridge_batch.
- */
-#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
-#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
-#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
-#define NM_BDG_HASH 1024 /* forwarding table entries */
-#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
-#define NM_MULTISEG 64 /* max size of a chain of bufs */
-/* actual size of the tables */
-#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
-/* NM_FT_NULL terminates a list of slots in the ft */
-#define NM_FT_NULL NM_BDG_BATCH_MAX
-#define NM_BRIDGES 8 /* number of bridges */
-
-
-/*
- * bridge_batch is set via sysctl to the max batch size to be
- * used in the bridge. The actual value may be larger as the
- * last packet in the block may overflow the size.
- */
-int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
-SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
-
-
-/*
- * These are used to handle reference counters for bridge ports.
- */
-#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount)
-#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount)
-
-/* The bridge references the buffers using the device specific look up table */
-static inline void *
-BDG_NMB(struct netmap_mem_d *nmd, struct netmap_slot *slot)
-{
- struct lut_entry *lut = nmd->pools[NETMAP_BUF_POOL].lut;
- uint32_t i = slot->buf_idx;
- return (unlikely(i >= nmd->pools[NETMAP_BUF_POOL].objtotal)) ? lut[0].vaddr : lut[i].vaddr;
-}
-
-static int bdg_netmap_attach(struct netmap_adapter *);
-static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
-int kern_netmap_regif(struct nmreq *nmr);
-
-/*
- * Each transmit queue accumulates a batch of packets into
- * a structure before forwarding. Packets to the same
- * destination are put in a list using ft_next as a link field.
- * ft_frags and ft_next are valid only on the first fragment.
- */
-struct nm_bdg_fwd { /* forwarding entry for a bridge */
- void *ft_buf; /* netmap or indirect buffer */
- uint8_t ft_frags; /* how many fragments (only on 1st frag) */
- uint8_t _ft_port; /* dst port (unused) */
- uint16_t ft_flags; /* flags, e.g. indirect */
- uint16_t ft_len; /* src fragment len */
- uint16_t ft_next; /* next packet to same destination */
-};
-
-/*
- * For each output interface, nm_bdg_q is used to construct a list.
- * bq_len is the number of output buffers (we can have coalescing
- * during the copy).
- */
-struct nm_bdg_q {
- uint16_t bq_head;
- uint16_t bq_tail;
- uint32_t bq_len; /* number of buffers */
-};
-
-/* XXX revise this */
-struct nm_hash_ent {
- uint64_t mac; /* the top 2 bytes are the epoch */
- uint64_t ports;
-};
-
-/*
- * nm_bridge is a descriptor for a VALE switch.
- * Interfaces for a bridge are all in bdg_ports[].
- * The array has fixed size, an empty entry does not terminate
- * the search, but lookups only occur on attach/detach so we
- * don't mind if they are slow.
- *
- * The bridge is non blocking on the transmit ports: excess
- * packets are dropped if there is no room on the output port.
- *
- * bdg_lock protects accesses to the bdg_ports array.
- * This is a rw lock (or equivalent).
- */
-struct nm_bridge {
- /* XXX what is the proper alignment/layout ? */
- BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
- int bdg_namelen;
- uint32_t bdg_active_ports; /* 0 means free */
- char bdg_basename[IFNAMSIZ];
-
- /* Indexes of active ports (up to active_ports)
- * and all other remaining ports.
- */
- uint8_t bdg_port_index[NM_BDG_MAXPORTS];
-
- struct netmap_adapter *bdg_ports[NM_BDG_MAXPORTS];
-
-
- /*
- * The function to decide the destination port.
- * It returns either of an index of the destination port,
- * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
- * forward this packet. ring_nr is the source ring index, and the
- * function may overwrite this value to forward this packet to a
- * different ring index.
- * This function must be set by netmap_bdgctl().
- */
- bdg_lookup_fn_t nm_bdg_lookup;
-
- /* the forwarding table, MAC+ports.
- * XXX should be changed to an argument to be passed to
- * the lookup function, and allocated on attach
- */
- struct nm_hash_ent ht[NM_BDG_HASH];
-};
-
-
-/*
- * XXX in principle nm_bridges could be created dynamically
- * Right now we have a static array and deletions are protected
- * by an exclusive lock.
- */
-struct nm_bridge nm_bridges[NM_BRIDGES];
-
-
-/*
- * A few function to tell which kind of port are we using.
- * XXX should we hold a lock ?
- *
- * nma_is_vp() virtual port
- * nma_is_host() port connected to the host stack
- * nma_is_hw() port connected to a NIC
- */
-int nma_is_vp(struct netmap_adapter *na);
-int
-nma_is_vp(struct netmap_adapter *na)
-{
- return na->nm_register == bdg_netmap_reg;
-}
-
-static __inline int
-nma_is_host(struct netmap_adapter *na)
-{
- return na->nm_register == NULL;
-}
-
-static __inline int
-nma_is_hw(struct netmap_adapter *na)
-{
- /* In case of sw adapter, nm_register is NULL */
- return !nma_is_vp(na) && !nma_is_host(na);
-}
-
-
-/*
- * If the NIC is owned by the kernel
- * (i.e., bridge), neither another bridge nor user can use it;
- * if the NIC is owned by a user, only users can share it.
- * Evaluation must be done under NMG_LOCK().
- */
-#define NETMAP_OWNED_BY_KERN(ifp) (!nma_is_vp(NA(ifp)) && NA(ifp)->na_bdg)
-#define NETMAP_OWNED_BY_ANY(ifp) \
- (NETMAP_OWNED_BY_KERN(ifp) || (NA(ifp)->refcount > 0))
-
-/*
- * NA(ifp)->bdg_port port index
- */
-
-
-/*
- * this is a slightly optimized copy routine which rounds
- * to multiple of 64 bytes and is often faster than dealing
- * with other odd sizes. We assume there is enough room
- * in the source and destination buffers.
- *
- * XXX only for multiples of 64 bytes, non overlapped.
- */
-static inline void
-pkt_copy(void *_src, void *_dst, int l)
-{
- uint64_t *src = _src;
- uint64_t *dst = _dst;
- if (unlikely(l >= 1024)) {
- memcpy(dst, src, l);
- return;
- }
- for (; likely(l > 0); l-=64) {
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- }
-}
-
-
-/*
- * locate a bridge among the existing ones.
- * MUST BE CALLED WITH NMG_LOCK()
- *
- * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
- * We assume that this is called with a name of at least NM_NAME chars.
- */
-static struct nm_bridge *
-nm_find_bridge(const char *name, int create)
-{
- int i, l, namelen;
- struct nm_bridge *b = NULL;
-
- NMG_LOCK_ASSERT();
-
- namelen = strlen(NM_NAME); /* base length */
- l = name ? strlen(name) : 0; /* actual length */
- if (l < namelen) {
- D("invalid bridge name %s", name ? name : NULL);
- return NULL;
- }
- for (i = namelen + 1; i < l; i++) {
- if (name[i] == ':') {
- namelen = i;
- break;
- }
- }
- if (namelen >= IFNAMSIZ)
- namelen = IFNAMSIZ;
- ND("--- prefix is '%.*s' ---", namelen, name);
-
- /* lookup the name, remember empty slot if there is one */
- for (i = 0; i < NM_BRIDGES; i++) {
- struct nm_bridge *x = nm_bridges + i;
-
- if (x->bdg_active_ports == 0) {
- if (create && b == NULL)
- b = x; /* record empty slot */
- } else if (x->bdg_namelen != namelen) {
- continue;
- } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
- ND("found '%.*s' at %d", namelen, name, i);
- b = x;
- break;
- }
- }
- if (i == NM_BRIDGES && b) { /* name not found, can create entry */
- /* initialize the bridge */
- strncpy(b->bdg_basename, name, namelen);
- ND("create new bridge %s with ports %d", b->bdg_basename,
- b->bdg_active_ports);
- b->bdg_namelen = namelen;
- b->bdg_active_ports = 0;
- for (i = 0; i < NM_BDG_MAXPORTS; i++)
- b->bdg_port_index[i] = i;
- /* set the default function */
- b->nm_bdg_lookup = netmap_bdg_learning;
- /* reset the MAC address table */
- bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- }
- return b;
-}
-
-
-/*
- * Free the forwarding tables for rings attached to switch ports.
- */
-static void
-nm_free_bdgfwd(struct netmap_adapter *na)
-{
- int nrings, i;
- struct netmap_kring *kring;
-
- NMG_LOCK_ASSERT();
- nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
- kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
- for (i = 0; i < nrings; i++) {
- if (kring[i].nkr_ft) {
- free(kring[i].nkr_ft, M_DEVBUF);
- kring[i].nkr_ft = NULL; /* protect from freeing twice */
- }
- }
- if (nma_is_hw(na))
- nm_free_bdgfwd(SWNA(na->ifp));
-}
-
-
-/*
- * Allocate the forwarding tables for the rings attached to the bridge ports.
- */
-static int
-nm_alloc_bdgfwd(struct netmap_adapter *na)
-{
- int nrings, l, i, num_dstq;
- struct netmap_kring *kring;
-
- NMG_LOCK_ASSERT();
- /* all port:rings + broadcast */
- num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
- l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
- l += sizeof(struct nm_bdg_q) * num_dstq;
- l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
-
- nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
- kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
- for (i = 0; i < nrings; i++) {
- struct nm_bdg_fwd *ft;
- struct nm_bdg_q *dstq;
- int j;
-
- ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (!ft) {
- nm_free_bdgfwd(na);
- return ENOMEM;
- }
- dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
- for (j = 0; j < num_dstq; j++) {
- dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
- dstq[j].bq_len = 0;
- }
- kring[i].nkr_ft = ft;
- }
- if (nma_is_hw(na))
- nm_alloc_bdgfwd(SWNA(na->ifp));
- return 0;
-}
/*
* Fetch configuration from the device, to cope with dynamic
* reconfigurations after loading the module.
*/
-static int
+int
netmap_update_config(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
@@ -836,7 +392,7 @@ netmap_update_config(struct netmap_adapter *na)
txr = txd = rxr = rxd = 0;
if (na->nm_config) {
- na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
+ na->nm_config(na, &txr, &txd, &rxr, &rxd);
} else {
/* take whatever we had at init time */
txr = na->num_tx_rings;
@@ -848,15 +404,15 @@ netmap_update_config(struct netmap_adapter *na)
if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
na->num_rx_rings == rxr && na->num_rx_desc == rxd)
return 0; /* nothing changed */
- if (netmap_verbose || na->refcount > 0) {
+ if (netmap_verbose || na->active_fds > 0) {
D("stored config %s: txring %d x %d, rxring %d x %d",
- ifp->if_xname,
+ NM_IFPNAME(ifp),
na->num_tx_rings, na->num_tx_desc,
na->num_rx_rings, na->num_rx_desc);
D("new config %s: txring %d x %d, rxring %d x %d",
- ifp->if_xname, txr, txd, rxr, rxd);
+ NM_IFPNAME(ifp), txr, txd, rxr, rxd);
}
- if (na->refcount == 0) {
+ if (na->active_fds == 0) {
D("configuration changed (but fine)");
na->num_tx_rings = txr;
na->num_tx_desc = txd;
@@ -868,52 +424,111 @@ netmap_update_config(struct netmap_adapter *na)
return 1;
}
-static struct netmap_if *
+
+int
+netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom)
+{
+ u_int i, len, ndesc;
+ struct netmap_kring *kring;
+
+ len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
+
+ na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (na->tx_rings == NULL) {
+ D("Cannot allocate krings");
+ return ENOMEM;
+ }
+ na->rx_rings = na->tx_rings + ntx;
+
+ ndesc = na->num_tx_desc;
+ for (i = 0; i < ntx; i++) { /* Transmit rings */
+ kring = &na->tx_rings[i];
+ bzero(kring, sizeof(*kring));
+ kring->na = na;
+ kring->nkr_num_slots = ndesc;
+ /*
+ * IMPORTANT:
+ * Always keep one slot empty, so we can detect new
+ * transmissions comparing cur and nr_hwcur (they are
+ * the same only if there are no new transmissions).
+ */
+ kring->nr_hwavail = ndesc - 1;
+ mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
+ init_waitqueue_head(&kring->si);
+ }
+
+ ndesc = na->num_rx_desc;
+ for (i = 0; i < nrx; i++) { /* Receive rings */
+ kring = &na->rx_rings[i];
+ bzero(kring, sizeof(*kring));
+ kring->na = na;
+ kring->nkr_num_slots = ndesc;
+ mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
+ init_waitqueue_head(&kring->si);
+ }
+ init_waitqueue_head(&na->tx_si);
+ init_waitqueue_head(&na->rx_si);
+
+ na->tailroom = na->rx_rings + nrx;
+
+ return 0;
+
+}
+
+
+void
+netmap_krings_delete(struct netmap_adapter *na)
+{
+ int i;
+
+ for (i = 0; i < na->num_tx_rings + 1; i++) {
+ mtx_destroy(&na->tx_rings[i].q_lock);
+ }
+ for (i = 0; i < na->num_rx_rings + 1; i++) {
+ mtx_destroy(&na->rx_rings[i].q_lock);
+ }
+ free(na->tx_rings, M_DEVBUF);
+ na->tx_rings = na->rx_rings = na->tailroom = NULL;
+}
+
+
+static struct netmap_if*
netmap_if_new(const char *ifname, struct netmap_adapter *na)
{
+ struct netmap_if *nifp;
+
if (netmap_update_config(na)) {
/* configuration mismatch, report and fail */
return NULL;
}
- return netmap_mem_if_new(ifname, na);
-}
+ if (na->active_fds)
+ goto final;
-/* Structure associated to each thread which registered an interface.
- *
- * The first 4 fields of this structure are written by NIOCREGIF and
- * read by poll() and NIOC?XSYNC.
- * There is low contention among writers (actually, a correct user program
- * should have no contention among writers) and among writers and readers,
- * so we use a single global lock to protect the structure initialization.
- * Since initialization involves the allocation of memory, we reuse the memory
- * allocator lock.
- * Read access to the structure is lock free. Readers must check that
- * np_nifp is not NULL before using the other fields.
- * If np_nifp is NULL initialization has not been performed, so they should
- * return an error to userlevel.
- *
- * The ref_done field is used to regulate access to the refcount in the
- * memory allocator. The refcount must be incremented at most once for
- * each open("/dev/netmap"). The increment is performed by the first
- * function that calls netmap_get_memory() (currently called by
- * mmap(), NIOCGINFO and NIOCREGIF).
- * If the refcount is incremented, it is then decremented when the
- * private structure is destroyed.
- */
-struct netmap_priv_d {
- struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
+ if (na->nm_krings_create(na))
+ goto cleanup;
- struct ifnet *np_ifp; /* device for which we hold a ref. */
- int np_ringid; /* from the ioctl */
- u_int np_qfirst, np_qlast; /* range of rings to scan */
- uint16_t np_txpoll;
+ if (netmap_mem_rings_create(na))
+ goto cleanup;
+
+final:
+
+ nifp = netmap_mem_if_new(ifname, na);
+ if (nifp == NULL)
+ goto cleanup;
+
+ return (nifp);
+
+cleanup:
+
+ if (na->active_fds == 0) {
+ netmap_mem_rings_delete(na);
+ na->nm_krings_delete(na);
+ }
+
+ return NULL;
+}
- struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
-#ifdef __FreeBSD__
- int np_refcount; /* use with NMG_LOCK held */
-#endif /* __FreeBSD__ */
-};
/* grab a reference to the memory allocator, if we don't have one already. The
* reference is taken from the netmap_adapter registered with the priv.
@@ -925,7 +540,7 @@ netmap_get_memory_locked(struct netmap_priv_d* p)
struct netmap_mem_d *nmd;
int error = 0;
- if (p->np_ifp == NULL) {
+ if (p->np_na == NULL) {
if (!netmap_mmap_unreg)
return ENODEV;
/* for compatibility with older versions of the API
@@ -934,7 +549,7 @@ netmap_get_memory_locked(struct netmap_priv_d* p)
*/
nmd = &nm_mem;
} else {
- nmd = NA(p->np_ifp)->nm_mem;
+ nmd = p->np_na->nm_mem;
}
if (p->np_mref == NULL) {
error = netmap_mem_finalize(nmd);
@@ -950,7 +565,8 @@ netmap_get_memory_locked(struct netmap_priv_d* p)
return error;
}
-static int
+
+int
netmap_get_memory(struct netmap_priv_d* p)
{
int error;
@@ -960,12 +576,14 @@ netmap_get_memory(struct netmap_priv_d* p)
return error;
}
+
static int
netmap_have_memory_locked(struct netmap_priv_d* p)
{
return p->np_mref != NULL;
}
+
static void
netmap_drop_memory_locked(struct netmap_priv_d* p)
{
@@ -975,11 +593,12 @@ netmap_drop_memory_locked(struct netmap_priv_d* p)
}
}
+
/*
* File descriptor's private data destructor.
*
* Call nm_register(ifp,0) to stop netmap mode on the interface and
- * revert to normal operation. We expect that np_ifp has not gone.
+ * revert to normal operation. We expect that np_na->ifp has not gone.
* The second argument is the nifp to work on. In some cases it is
* not attached yet to the netmap_priv_d so we need to pass it as
* a separate argument.
@@ -988,16 +607,15 @@ netmap_drop_memory_locked(struct netmap_priv_d* p)
static void
netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
{
- struct ifnet *ifp = priv->np_ifp;
- struct netmap_adapter *na = NA(ifp);
+ struct netmap_adapter *na = priv->np_na;
+ struct ifnet *ifp = na->ifp;
NMG_LOCK_ASSERT();
- na->refcount--;
- if (na->refcount <= 0) { /* last instance */
- u_int i;
+ na->active_fds--;
+ if (na->active_fds <= 0) { /* last instance */
if (netmap_verbose)
- D("deleting last instance for %s", ifp->if_xname);
+ D("deleting last instance for %s", NM_IFPNAME(ifp));
/*
* (TO CHECK) This function is only called
* when the last reference to this file descriptor goes
@@ -1012,140 +630,33 @@ netmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
* happens if the close() occurs while a concurrent
* syscall is running.
*/
- na->nm_register(ifp, 0); /* off, clear IFCAP_NETMAP */
+ if (ifp)
+ na->nm_register(na, 0); /* off, clear flags */
/* Wake up any sleeping threads. netmap_poll will
* then return POLLERR
* XXX The wake up now must happen during *_down(), when
* we order all activities to stop. -gl
*/
- nm_free_bdgfwd(na);
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- mtx_destroy(&na->tx_rings[i].q_lock);
- }
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- mtx_destroy(&na->rx_rings[i].q_lock);
- }
/* XXX kqueue(9) needed; these will mirror knlist_init. */
/* knlist_destroy(&na->tx_si.si_note); */
/* knlist_destroy(&na->rx_si.si_note); */
- if (nma_is_hw(na))
- SWNA(ifp)->tx_rings = SWNA(ifp)->rx_rings = NULL;
- }
- /*
- * netmap_mem_if_delete() deletes the nifp, and if this is
- * the last instance also buffers, rings and krings.
- */
- netmap_mem_if_delete(na, nifp);
-}
-
-
-/* we assume netmap adapter exists
- * Called with NMG_LOCK held
- */
-static void
-nm_if_rele(struct ifnet *ifp)
-{
- int i, is_hw, hw, sw, lim;
- struct nm_bridge *b;
- struct netmap_adapter *na;
- uint8_t tmp[NM_BDG_MAXPORTS];
-
- NMG_LOCK_ASSERT();
- /* I can be called not only for get_ifp()-ed references where netmap's
- * capability is guaranteed, but also for non-netmap-capable NICs.
- */
- if (!NETMAP_CAPABLE(ifp) || !NA(ifp)->na_bdg) {
- if_rele(ifp);
- return;
- }
- na = NA(ifp);
- b = na->na_bdg;
- is_hw = nma_is_hw(na);
-
- ND("%s has %d references", ifp->if_xname, NA(ifp)->na_bdg_refcount);
-
- if (!DROP_BDG_REF(ifp))
- return;
-
- /*
- New algorithm:
- make a copy of bdg_port_index;
- lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
- in the array of bdg_port_index, replacing them with
- entries from the bottom of the array;
- decrement bdg_active_ports;
- acquire BDG_WLOCK() and copy back the array.
- */
-
- hw = NA(ifp)->bdg_port;
- sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
- lim = b->bdg_active_ports;
-
- ND("detach %d and %d (lim %d)", hw, sw, lim);
- /* make a copy of the list of active ports, update it,
- * and then copy back within BDG_WLOCK().
- */
- memcpy(tmp, b->bdg_port_index, sizeof(tmp));
- for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
- if (hw >= 0 && tmp[i] == hw) {
- ND("detach hw %d at %d", hw, i);
- lim--; /* point to last active port */
- tmp[i] = tmp[lim]; /* swap with i */
- tmp[lim] = hw; /* now this is inactive */
- hw = -1;
- } else if (sw >= 0 && tmp[i] == sw) {
- ND("detach sw %d at %d", sw, i);
- lim--;
- tmp[i] = tmp[lim];
- tmp[lim] = sw;
- sw = -1;
- } else {
- i++;
- }
- }
- if (hw >= 0 || sw >= 0) {
- D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
- }
- hw = NA(ifp)->bdg_port;
- sw = (is_hw && SWNA(ifp)->na_bdg) ? SWNA(ifp)->bdg_port : -1;
- BDG_WLOCK(b);
- b->bdg_ports[hw] = NULL;
- na->na_bdg = NULL;
- if (sw >= 0) {
- b->bdg_ports[sw] = NULL;
- SWNA(ifp)->na_bdg = NULL;
- }
- memcpy(b->bdg_port_index, tmp, sizeof(tmp));
- b->bdg_active_ports = lim;
- BDG_WUNLOCK(b);
-
- ND("now %d active ports", lim);
- if (lim == 0) {
- ND("marking bridge %s as free", b->bdg_basename);
- b->nm_bdg_lookup = NULL;
- }
-
- if (is_hw) {
- if_rele(ifp);
- } else {
- if (na->na_flags & NAF_MEM_OWNER)
- netmap_mem_private_delete(na->nm_mem);
- bzero(na, sizeof(*na));
- free(na, M_DEVBUF);
- bzero(ifp, sizeof(*ifp));
- free(ifp, M_DEVBUF);
+ /* delete rings and buffers */
+ netmap_mem_rings_delete(na);
+ na->nm_krings_delete(na);
}
+ /* delete the nifp */
+ netmap_mem_if_delete(na, nifp);
}
/*
* returns 1 if this is the last instance and we can free priv
*/
-static int
+int
netmap_dtor_locked(struct netmap_priv_d *priv)
{
- struct ifnet *ifp = priv->np_ifp;
+ struct netmap_adapter *na = priv->np_na;
#ifdef __FreeBSD__
/*
@@ -1156,17 +667,21 @@ netmap_dtor_locked(struct netmap_priv_d *priv)
return 0;
}
#endif /* __FreeBSD__ */
- if (ifp) {
- netmap_do_unregif(priv, priv->np_nifp);
+ if (!na) {
+ return 1; //XXX is it correct?
}
+ netmap_do_unregif(priv, priv->np_nifp);
+ priv->np_nifp = NULL;
netmap_drop_memory_locked(priv);
- if (ifp) {
- nm_if_rele(ifp); /* might also destroy *na */
+ if (priv->np_na) {
+ netmap_adapter_put(na);
+ priv->np_na = NULL;
}
return 1;
}
-static void
+
+void
netmap_dtor(void *data)
{
struct netmap_priv_d *priv = data;
@@ -1182,190 +697,6 @@ netmap_dtor(void *data)
}
-#ifdef __FreeBSD__
-
-/*
- * In order to track whether pages are still mapped, we hook into
- * the standard cdev_pager and intercept the constructor and
- * destructor.
- */
-
-struct netmap_vm_handle_t {
- struct cdev *dev;
- struct netmap_priv_d *priv;
-};
-
-static int
-netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
- vm_ooffset_t foff, struct ucred *cred, u_short *color)
-{
- struct netmap_vm_handle_t *vmh = handle;
- D("handle %p size %jd prot %d foff %jd",
- handle, (intmax_t)size, prot, (intmax_t)foff);
- dev_ref(vmh->dev);
- return 0;
-}
-
-
-static void
-netmap_dev_pager_dtor(void *handle)
-{
- struct netmap_vm_handle_t *vmh = handle;
- struct cdev *dev = vmh->dev;
- struct netmap_priv_d *priv = vmh->priv;
- D("handle %p", handle);
- netmap_dtor(priv);
- free(vmh, M_DEVBUF);
- dev_rel(dev);
-}
-
-static int
-netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
- int prot, vm_page_t *mres)
-{
- struct netmap_vm_handle_t *vmh = object->handle;
- struct netmap_priv_d *priv = vmh->priv;
- vm_paddr_t paddr;
- vm_page_t page;
- vm_memattr_t memattr;
- vm_pindex_t pidx;
-
- ND("object %p offset %jd prot %d mres %p",
- object, (intmax_t)offset, prot, mres);
- memattr = object->memattr;
- pidx = OFF_TO_IDX(offset);
- paddr = netmap_mem_ofstophys(priv->np_mref, offset);
- if (paddr == 0)
- return VM_PAGER_FAIL;
-
- if (((*mres)->flags & PG_FICTITIOUS) != 0) {
- /*
- * If the passed in result page is a fake page, update it with
- * the new physical address.
- */
- page = *mres;
- vm_page_updatefake(page, paddr, memattr);
- } else {
- /*
- * Replace the passed in reqpage page with our own fake page and
- * free up the all of the original pages.
- */
-#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
-#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
-#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
-#endif /* VM_OBJECT_WUNLOCK */
-
- VM_OBJECT_WUNLOCK(object);
- page = vm_page_getfake(paddr, memattr);
- VM_OBJECT_WLOCK(object);
- vm_page_lock(*mres);
- vm_page_free(*mres);
- vm_page_unlock(*mres);
- *mres = page;
- vm_page_insert(page, object, pidx);
- }
- page->valid = VM_PAGE_BITS_ALL;
- return (VM_PAGER_OK);
-}
-
-
-static struct cdev_pager_ops netmap_cdev_pager_ops = {
- .cdev_pg_ctor = netmap_dev_pager_ctor,
- .cdev_pg_dtor = netmap_dev_pager_dtor,
- .cdev_pg_fault = netmap_dev_pager_fault,
-};
-
-
-static int
-netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
- vm_size_t objsize, vm_object_t *objp, int prot)
-{
- int error;
- struct netmap_vm_handle_t *vmh;
- struct netmap_priv_d *priv;
- vm_object_t obj;
-
- D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
- (intmax_t )*foff, (intmax_t )objsize, objp, prot);
-
- vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (vmh == NULL)
- return ENOMEM;
- vmh->dev = cdev;
-
- NMG_LOCK();
- error = devfs_get_cdevpriv((void**)&priv);
- if (error)
- goto err_unlock;
- vmh->priv = priv;
- priv->np_refcount++;
- NMG_UNLOCK();
-
- error = netmap_get_memory(priv);
- if (error)
- goto err_deref;
-
- obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
- &netmap_cdev_pager_ops, objsize, prot,
- *foff, NULL);
- if (obj == NULL) {
- D("cdev_pager_allocate failed");
- error = EINVAL;
- goto err_deref;
- }
-
- *objp = obj;
- return 0;
-
-err_deref:
- NMG_LOCK();
- priv->np_refcount--;
-err_unlock:
- NMG_UNLOCK();
-// err:
- free(vmh, M_DEVBUF);
- return error;
-}
-
-
-// XXX can we remove this ?
-static int
-netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
-{
- if (netmap_verbose)
- D("dev %p fflag 0x%x devtype %d td %p",
- dev, fflag, devtype, td);
- return 0;
-}
-
-
-static int
-netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
- struct netmap_priv_d *priv;
- int error;
-
- (void)dev;
- (void)oflags;
- (void)devtype;
- (void)td;
-
- // XXX wait or nowait ?
- priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (priv == NULL)
- return ENOMEM;
-
- error = devfs_set_cdevpriv(priv, netmap_dtor);
- if (error)
- return error;
-
- priv->np_refcount = 1;
-
- return 0;
-}
-#endif /* __FreeBSD__ */
/*
@@ -1391,26 +722,19 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
* pass a chain of buffers to the host stack as coming from 'dst'
*/
static void
-netmap_send_up(struct ifnet *dst, struct mbuf *head)
+netmap_send_up(struct ifnet *dst, struct mbq *q)
{
struct mbuf *m;
/* send packets up, outside the lock */
- while ((m = head) != NULL) {
- head = head->m_nextpkt;
- m->m_nextpkt = NULL;
+ while ((m = mbq_dequeue(q)) != NULL) {
if (netmap_verbose & NM_VERB_HOST)
D("sending up pkt %p size %d", m, MBUF_LEN(m));
NM_SEND_UP(dst, m);
}
+ mbq_destroy(q);
}
-struct mbq {
- struct mbuf *head;
- struct mbuf *tail;
- int count;
-};
-
/*
* put a copy of the buffers marked NS_FORWARD into an mbuf chain.
@@ -1425,9 +749,9 @@ netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
* XXX handle reserved
*/
u_int lim = kring->nkr_num_slots - 1;
- struct mbuf *m, *tail = q->tail;
+ struct mbuf *m;
u_int k = kring->ring->cur, n = kring->ring->reserved;
- struct netmap_mem_d *nmd = kring->na->nm_mem;
+ struct netmap_adapter *na = kring->na;
/* compute the final position, ring->cur - ring->reserved */
if (n > 0) {
@@ -1441,25 +765,18 @@ netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
n = nm_next(n, lim);
if ((slot->flags & NS_FORWARD) == 0 && !force)
continue;
- if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(nmd)) {
+ if (slot->len < 14 || slot->len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) {
D("bad pkt at %d len %d", n, slot->len);
continue;
}
slot->flags &= ~NS_FORWARD; // XXX needed ?
/* XXX adapt to the case of a multisegment packet */
- m = m_devget(BDG_NMB(nmd, slot), slot->len, 0, kring->na->ifp, NULL);
+ m = m_devget(BDG_NMB(na, slot), slot->len, 0, na->ifp, NULL);
if (m == NULL)
break;
- if (tail)
- tail->m_nextpkt = m;
- else
- q->head = m;
- tail = m;
- q->count++;
- m->m_nextpkt = NULL;
+ mbq_enqueue(q, m);
}
- q->tail = tail;
}
@@ -1536,16 +853,19 @@ out:
* can be among multiple user threads erroneously calling
* this routine concurrently.
*/
-static void
+void
netmap_txsync_to_host(struct netmap_adapter *na)
{
struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
struct netmap_ring *ring = kring->ring;
u_int k, lim = kring->nkr_num_slots - 1;
- struct mbq q = { NULL, NULL, 0 };
+ struct mbq q;
+ int error;
- if (nm_kr_tryget(kring)) {
- D("ring %p busy (user error)", kring);
+ error = nm_kr_tryget(kring);
+ if (error) {
+ if (error == NM_KR_BUSY)
+ D("ring %p busy (user error)", kring);
return;
}
k = ring->cur;
@@ -1560,29 +880,13 @@ netmap_txsync_to_host(struct netmap_adapter *na)
* In case of no buffers we give up. At the end of the loop,
* the queue is drained in all cases.
*/
+ mbq_init(&q);
netmap_grab_packets(kring, &q, 1);
kring->nr_hwcur = k;
kring->nr_hwavail = ring->avail = lim;
nm_kr_put(kring);
- netmap_send_up(na->ifp, q.head);
-}
-
-
-/*
- * This is the 'txsync' handler to send from a software ring to the
- * host stack.
- */
-/* SWNA(ifp)->txrings[0] is always NA(ifp)->txrings[NA(ifp)->num_txrings] */
-static int
-netmap_bdg_to_host(struct ifnet *ifp, u_int ring_nr, int flags)
-{
- (void)ring_nr;
- (void)flags;
- if (netmap_verbose > 255)
- RD(5, "sync to host %s ring %d", ifp->if_xname, ring_nr);
- netmap_txsync_to_host(NA(ifp));
- return 0;
+ netmap_send_up(na->ifp, &q);
}
@@ -1610,7 +914,6 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
if (kring->nkr_stopped) /* check a first time without lock */
return;
- /* XXX as an optimization we could reuse na->core_lock */
mtx_lock(&kring->q_lock);
if (kring->nkr_stopped) /* check again with lock held */
@@ -1629,7 +932,7 @@ netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwai
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim - resvd;
- }
+ }
if (j != k) {
n = k >= j ? k - j : k + lim - j;
kring->nr_hwavail -= n;
@@ -1646,6 +949,104 @@ unlock_out:
}
+/* Get a netmap adapter for the port.
+ *
+ * If it is possible to satisfy the request, return 0
+ * with *na containing the netmap adapter found.
+ * Otherwise return an error code, with *na containing NULL.
+ *
+ * When the port is attached to a bridge, we always return
+ * EBUSY.
+ * Otherwise, if the port is already bound to a file descriptor,
+ * then we unconditionally return the existing adapter into *na.
+ * In all the other cases, we return (into *na) either native,
+ * generic or NULL, according to the following table:
+ *
+ * native_support
+ * active_fds dev.netmap.admode YES NO
+ * -------------------------------------------------------
+ * >0 * NA(ifp) NA(ifp)
+ *
+ * 0 NETMAP_ADMODE_BEST NATIVE GENERIC
+ * 0 NETMAP_ADMODE_NATIVE NATIVE NULL
+ * 0 NETMAP_ADMODE_GENERIC GENERIC GENERIC
+ *
+ */
+
+int
+netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
+{
+ /* generic support */
+ int i = netmap_admode; /* Take a snapshot. */
+ int error = 0;
+ struct netmap_adapter *prev_na;
+ struct netmap_generic_adapter *gna;
+
+ *na = NULL; /* default */
+
+ /* reset in case of invalid value */
+ if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
+ i = netmap_admode = NETMAP_ADMODE_BEST;
+
+ if (NETMAP_CAPABLE(ifp)) {
+ /* If an adapter already exists, but is
+ * attached to a vale port, we report that the
+ * port is busy.
+ */
+ if (NETMAP_OWNED_BY_KERN(NA(ifp)))
+ return EBUSY;
+
+ /* If an adapter already exists, return it if
+ * there are active file descriptors or if
+ * netmap is not forced to use generic
+ * adapters.
+ */
+ if (NA(ifp)->active_fds > 0 ||
+ i != NETMAP_ADMODE_GENERIC) {
+ *na = NA(ifp);
+ return 0;
+ }
+ }
+
+ /* If there isn't native support and netmap is not allowed
+ * to use generic adapters, we cannot satisfy the request.
+ */
+ if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
+ return EINVAL;
+
+ /* Otherwise, create a generic adapter and return it,
+ * saving the previously used netmap adapter, if any.
+ *
+ * Note that here 'prev_na', if not NULL, MUST be a
+ * native adapter, and CANNOT be a generic one. This is
+ * true because generic adapters are created on demand, and
+ * destroyed when not used anymore. Therefore, if the adapter
+ * currently attached to an interface 'ifp' is generic, it
+ * must be that
+ * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
+ * Consequently, if NA(ifp) is generic, we will enter one of
+ * the branches above. This ensures that we never override
+ * a generic adapter with another generic adapter.
+ */
+ prev_na = NA(ifp);
+ error = generic_netmap_attach(ifp);
+ if (error)
+ return error;
+
+ *na = NA(ifp);
+ gna = (struct netmap_generic_adapter*)NA(ifp);
+ gna->prev = prev_na; /* save old na */
+ if (prev_na != NULL) {
+ ifunit_ref(ifp->if_xname);
+ // XXX add a refcount ?
+ netmap_adapter_get(prev_na);
+ }
+ D("Created generic NA %p (prev %p)", gna, gna->prev);
+
+ return 0;
+}
+
+
/*
* MUST BE CALLED UNDER NMG_LOCK()
*
@@ -1666,179 +1067,191 @@ unlock_out:
* being detached from the bridge in error handling. But once refcount
* is acquired by this function, it must be released using nm_if_rele().
*/
-static int
-get_ifp(struct nmreq *nmr, struct ifnet **ifp, int create)
+int
+netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
{
- const char *name = nmr->nr_name;
- int namelen = strlen(name);
- struct ifnet *iter = NULL;
- int no_prefix = 0;
+ struct ifnet *ifp;
+ int error = 0;
+ struct netmap_adapter *ret;
- /* first try to see if this is a bridge port. */
- struct nm_bridge *b;
- struct netmap_adapter *na;
- int i, j, cand = -1, cand2 = -1;
- int needed;
+ *na = NULL; /* default return value */
+ /* first try to see if this is a bridge port. */
NMG_LOCK_ASSERT();
- *ifp = NULL; /* default */
- if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
- no_prefix = 1; /* no VALE prefix */
- goto no_bridge_port;
- }
- b = nm_find_bridge(name, create);
- if (b == NULL) {
- D("no bridges available for '%s'", name);
- return (ENXIO);
+ error = netmap_get_bdg_na(nmr, na, create);
+ if (error || *na != NULL) /* valid match in netmap_get_bdg_na() */
+ return error;
+
+ ifp = ifunit_ref(nmr->nr_name);
+ if (ifp == NULL) {
+ return ENXIO;
}
- /* Now we are sure that name starts with the bridge's name,
- * lookup the port in the bridge. We need to scan the entire
- * list. It is not important to hold a WLOCK on the bridge
- * during the search because NMG_LOCK already guarantees
- * that there are no other possible writers.
- */
+ error = netmap_get_hw_na(ifp, &ret);
+ if (error)
+ goto out;
- /* lookup in the local list of ports */
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- na = b->bdg_ports[i];
- // KASSERT(na != NULL);
- iter = na->ifp;
- /* XXX make sure the name only contains one : */
- if (!strcmp(iter->if_xname, name) /* virtual port */ ||
- (namelen > b->bdg_namelen && !strcmp(iter->if_xname,
- name + b->bdg_namelen + 1)) /* NIC */) {
- ADD_BDG_REF(iter);
- ND("found existing if %s refs %d", name,
- NA(iter)->na_bdg_refcount);
- *ifp = iter;
- /* we are done, this is surely netmap capable */
- return 0;
+ if (ret != NULL) {
+ /* Users cannot use the NIC attached to a bridge directly */
+ if (NETMAP_OWNED_BY_KERN(ret)) {
+ error = EINVAL;
+ goto out;
}
+ error = 0;
+ *na = ret;
+ netmap_adapter_get(ret);
}
- /* not found, should we create it? */
- if (!create)
- return ENXIO;
- /* yes we should, see if we have space to attach entries */
- needed = 2; /* in some cases we only need 1 */
- if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
- D("bridge full %d, cannot create new port", b->bdg_active_ports);
- return EINVAL;
+out:
+ if_rele(ifp);
+
+ return error;
+}
+
+
+/*
+ * validate parameters on entry for *_txsync()
+ * Returns ring->cur if ok, or something >= kring->nkr_num_slots
+ * in case of error. The extra argument is a pointer to
+ * 'new_bufs'. XXX this may be deprecated at some point.
+ *
+ * Below is a correct configuration on input. ring->cur
+ * must be in the region covered by kring->hwavail,
+ * and ring->avail and kring->avail should end at the same slot.
+ *
+ * +-hwcur
+ * |
+ * v<--hwres-->|<-----hwavail---->
+ * ------+------------------------------+-------- ring
+ * |
+ * |<---avail--->
+ * +--cur
+ *
+ */
+u_int
+nm_txsync_prologue(struct netmap_kring *kring, u_int *new_slots)
+{
+ struct netmap_ring *ring = kring->ring;
+ u_int cur = ring->cur; /* read only once */
+ u_int avail = ring->avail; /* read only once */
+ u_int n = kring->nkr_num_slots;
+ u_int kstart, kend, a;
+
+#if 1 /* kernel sanity checks */
+ if (kring->nr_hwcur >= n ||
+ kring->nr_hwreserved >= n || kring->nr_hwavail >= n ||
+ kring->nr_hwreserved + kring->nr_hwavail >= n)
+ goto error;
+#endif /* kernel sanity checks */
+ kstart = kring->nr_hwcur + kring->nr_hwreserved;
+ if (kstart >= n)
+ kstart -= n;
+ kend = kstart + kring->nr_hwavail;
+ /* user sanity checks. a is the expected avail */
+ if (cur < kstart) {
+ /* too low, but maybe wraparound */
+ if (cur + n > kend)
+ goto error;
+ *new_slots = cur + n - kstart;
+ a = kend - cur - n;
+ } else {
+ if (cur > kend)
+ goto error;
+ *new_slots = cur - kstart;
+ a = kend - cur;
}
- /* record the next two ports available, but do not allocate yet */
- cand = b->bdg_port_index[b->bdg_active_ports];
- cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
- ND("+++ bridge %s port %s used %d avail %d %d",
- b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
+ if (a != avail) {
+ RD(5, "wrong but fixable avail have %d need %d",
+ avail, a);
+ ring->avail = avail = a;
+ }
+ return cur;
- /*
- * try see if there is a matching NIC with this name
- * (after the bridge's name)
- */
- iter = ifunit_ref(name + b->bdg_namelen + 1);
- if (!iter) { /* this is a virtual port */
- /* Create a temporary NA with arguments, then
- * bdg_netmap_attach() will allocate the real one
- * and attach it to the ifp
- */
- struct netmap_adapter tmp_na;
- int error;
+error:
+ RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d",
+ kring->nr_hwcur,
+ kring->nr_hwreserved, kring->nr_hwavail,
+ cur, avail);
+ return n;
+}
- if (nmr->nr_cmd) {
- /* nr_cmd must be 0 for a virtual port */
- return EINVAL;
- }
- bzero(&tmp_na, sizeof(tmp_na));
- /* bound checking */
- tmp_na.num_tx_rings = nmr->nr_tx_rings;
- nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
- nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
- tmp_na.num_rx_rings = nmr->nr_rx_rings;
- nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
- nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
- nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
- 1, NM_BDG_MAXSLOTS, NULL);
- tmp_na.num_tx_desc = nmr->nr_tx_slots;
- nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
- 1, NM_BDG_MAXSLOTS, NULL);
- tmp_na.num_rx_desc = nmr->nr_rx_slots;
-
- /* create a struct ifnet for the new port.
- * need M_NOWAIT as we are under nma_lock
- */
- iter = malloc(sizeof(*iter), M_DEVBUF, M_NOWAIT | M_ZERO);
- if (!iter)
- return ENOMEM;
-
- strcpy(iter->if_xname, name);
- tmp_na.ifp = iter;
- /* bdg_netmap_attach creates a struct netmap_adapter */
- error = bdg_netmap_attach(&tmp_na);
- if (error) {
- D("error %d", error);
- free(iter, M_DEVBUF);
- return error;
- }
- cand2 = -1; /* only need one port */
- } else if (NETMAP_CAPABLE(iter)) { /* this is a NIC */
- /* make sure the NIC is not already in use */
- if (NETMAP_OWNED_BY_ANY(iter)) {
- D("NIC %s busy, cannot attach to bridge",
- iter->if_xname);
- if_rele(iter); /* don't detach from bridge */
- return EINVAL;
+
+/*
+ * validate parameters on entry for *_rxsync()
+ * Returns ring->cur - ring->reserved if ok,
+ * or something >= kring->nkr_num_slots
+ * in case of error. The extra argument is a pointer to
+ * 'resvd'. XXX this may be deprecated at some point.
+ *
+ * Below is a correct configuration on input. ring->cur and
+ * ring->reserved must be in the region covered by kring->hwavail,
+ * and ring->avail and kring->avail should end at the same slot.
+ *
+ * +-hwcur
+ * |
+ * v<-------hwavail---------->
+ * ---------+--------------------------+-------- ring
+ * |<--res-->|
+ * |<---avail--->
+ * +--cur
+ *
+ */
+u_int
+nm_rxsync_prologue(struct netmap_kring *kring, u_int *resvd)
+{
+ struct netmap_ring *ring = kring->ring;
+ u_int cur = ring->cur; /* read only once */
+ u_int avail = ring->avail; /* read only once */
+ u_int res = ring->reserved; /* read only once */
+ u_int n = kring->nkr_num_slots;
+ u_int kend = kring->nr_hwcur + kring->nr_hwavail;
+ u_int a;
+
+#if 1 /* kernel sanity checks */
+ if (kring->nr_hwcur >= n || kring->nr_hwavail >= n)
+ goto error;
+#endif /* kernel sanity checks */
+ /* user sanity checks */
+ if (res >= n)
+ goto error;
+ /* check that cur is valid, a is the expected value of avail */
+ if (cur < kring->nr_hwcur) {
+ /* too low, but maybe wraparound */
+ if (cur + n > kend)
+ goto error;
+ a = kend - (cur + n);
+ } else {
+ if (cur > kend)
+ goto error;
+ a = kend - cur;
+ }
+ if (a != avail) {
+ RD(5, "wrong but fixable avail have %d need %d",
+ avail, a);
+ ring->avail = avail = a;
+ }
+ if (res != 0) {
+ /* then repeat the check for cur + res */
+ cur = (cur >= res) ? cur - res : n + cur - res;
+ if (cur < kring->nr_hwcur) {
+ /* too low, but maybe wraparound */
+ if (cur + n > kend)
+ goto error;
+ } else if (cur > kend) {
+ goto error;
}
- if (nmr->nr_arg1 != NETMAP_BDG_HOST)
- cand2 = -1; /* only need one port */
- } else { /* not a netmap-capable NIC */
- if_rele(iter); /* don't detach from bridge */
- return EINVAL;
}
- na = NA(iter);
-
- BDG_WLOCK(b);
- na->bdg_port = cand;
- ND("NIC %p to bridge port %d", NA(iter), cand);
- /* bind the port to the bridge (virtual ports are not active) */
- b->bdg_ports[cand] = na;
- na->na_bdg = b;
- b->bdg_active_ports++;
- if (cand2 >= 0) {
- /* also bind the host stack to the bridge */
- b->bdg_ports[cand2] = SWNA(iter);
- SWNA(iter)->bdg_port = cand2;
- SWNA(iter)->na_bdg = b;
- b->bdg_active_ports++;
- ND("host %p to bridge port %d", SWNA(iter), cand2);
- }
- ADD_BDG_REF(iter); // XXX one or two ?
- ND("if %s refs %d", name, NA(iter)->na_bdg_refcount);
- BDG_WUNLOCK(b);
- *ifp = iter;
- return 0;
-
-no_bridge_port:
- *ifp = iter;
- if (! *ifp)
- *ifp = ifunit_ref(name);
- if (*ifp == NULL)
- return (ENXIO);
+ *resvd = res;
+ return cur;
- if (NETMAP_CAPABLE(*ifp)) {
- /* Users cannot use the NIC attached to a bridge directly */
- if (no_prefix && NETMAP_OWNED_BY_KERN(*ifp)) {
- if_rele(*ifp); /* don't detach from bridge */
- return EINVAL;
- } else
- return 0; /* valid pointer, we hold the refcount */
- }
- nm_if_rele(*ifp);
- return EINVAL; // not NETMAP capable
+error:
+ RD(5, "kring error: hwcur %d hwres %d hwavail %d cur %d av %d res %d",
+ kring->nr_hwcur,
+ kring->nr_hwreserved, kring->nr_hwavail,
+ ring->cur, avail, res);
+ return n;
}
-
/*
* Error routine called when txsync/rxsync detects an error.
* Can't do much more than resetting cur = hwcur, avail = hwavail.
@@ -1859,7 +1272,7 @@ netmap_ring_reinit(struct netmap_kring *kring)
int errors = 0;
// XXX KASSERT nm_kr_tryget
- RD(10, "called for %s", kring->na->ifp->if_xname);
+ RD(10, "called for %s", NM_IFPNAME(kring->na->ifp));
if (ring->cur > lim)
errors++;
for (i = 0; i <= lim; i++) {
@@ -1884,7 +1297,7 @@ netmap_ring_reinit(struct netmap_kring *kring)
RD(10, "total %d errors", errors);
errors++;
RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
- kring->na->ifp->if_xname,
+ NM_IFPNAME(kring->na->ifp),
pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
ring->cur, kring->nr_hwcur,
ring->avail, kring->nr_hwavail);
@@ -1902,8 +1315,8 @@ netmap_ring_reinit(struct netmap_kring *kring)
static int
netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
{
- struct ifnet *ifp = priv->np_ifp;
- struct netmap_adapter *na = NA(ifp);
+ struct netmap_adapter *na = priv->np_na;
+ struct ifnet *ifp = na->ifp;
u_int i = ringid & NETMAP_RING_MASK;
/* initially (np_qfirst == np_qlast) we don't want to lock */
u_int lim = na->num_rx_rings;
@@ -1928,12 +1341,12 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
if (netmap_verbose) {
if (ringid & NETMAP_SW_RING)
- D("ringid %s set to SW RING", ifp->if_xname);
+ D("ringid %s set to SW RING", NM_IFPNAME(ifp));
else if (ringid & NETMAP_HW_RING)
- D("ringid %s set to HW RING %d", ifp->if_xname,
+ D("ringid %s set to HW RING %d", NM_IFPNAME(ifp),
priv->np_qfirst);
else
- D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
+ D("ringid %s set to all %d HW RINGS", NM_IFPNAME(ifp), lim);
}
return 0;
}
@@ -1944,18 +1357,18 @@ netmap_set_ringid(struct netmap_priv_d *priv, u_int ringid)
* If success it returns a pointer to netmap_if, otherwise NULL.
* This must be called with NMG_LOCK held.
*/
-static struct netmap_if *
-netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
+struct netmap_if *
+netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
uint16_t ringid, int *err)
{
- struct netmap_adapter *na = NA(ifp);
+ struct ifnet *ifp = na->ifp;
struct netmap_if *nifp = NULL;
- int error, need_mem;
+ int error, need_mem = 0;
NMG_LOCK_ASSERT();
/* ring configuration may have changed, fetch from the card */
netmap_update_config(na);
- priv->np_ifp = ifp; /* store the reference */
+ priv->np_na = na; /* store the reference */
error = netmap_set_ringid(priv, ringid);
if (error)
goto out;
@@ -1967,57 +1380,40 @@ netmap_do_regif(struct netmap_priv_d *priv, struct ifnet *ifp,
if (error)
goto out;
}
- nifp = netmap_if_new(ifp->if_xname, na);
+ nifp = netmap_if_new(NM_IFPNAME(ifp), na);
if (nifp == NULL) { /* allocation failed */
/* we should drop the allocator, but only
* if we were the ones who grabbed it
*/
- if (need_mem)
- netmap_drop_memory_locked(priv);
error = ENOMEM;
goto out;
}
- na->refcount++;
+ na->active_fds++;
if (ifp->if_capenable & IFCAP_NETMAP) {
/* was already set */
} else {
- u_int i;
/* Otherwise set the card in netmap mode
* and make it use the shared buffers.
*
- * If the interface is attached to a bridge, lock it.
- */
- if (NETMAP_OWNED_BY_KERN(ifp))
- BDG_WLOCK(NA(ifp)->na_bdg);
- for (i = 0 ; i < na->num_tx_rings + 1; i++)
- mtx_init(&na->tx_rings[i].q_lock, "nm_txq_lock",
- NULL, MTX_DEF);
- for (i = 0 ; i < na->num_rx_rings + 1; i++) {
- mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock",
- NULL, MTX_DEF);
- }
- if (nma_is_hw(na)) {
- SWNA(ifp)->tx_rings = &na->tx_rings[na->num_tx_rings];
- SWNA(ifp)->rx_rings = &na->rx_rings[na->num_rx_rings];
- }
- /*
* do not core lock because the race is harmless here,
* there cannot be any traffic to netmap_transmit()
*/
- error = na->nm_register(ifp, 1); /* mode on */
- // XXX do we need to nm_alloc_bdgfwd() in all cases ?
- if (!error)
- error = nm_alloc_bdgfwd(na);
+ na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut;
+ ND("%p->na_lut == %p", na, na->na_lut);
+ na->na_lut_objtotal = na->nm_mem->pools[NETMAP_BUF_POOL].objtotal;
+ error = na->nm_register(na, 1); /* mode on */
if (error) {
netmap_do_unregif(priv, nifp);
nifp = NULL;
}
- if (NETMAP_OWNED_BY_KERN(ifp))
- BDG_WUNLOCK(NA(ifp)->na_bdg);
-
}
out:
*err = error;
+ if (error) {
+ priv->np_na = NULL;
+ if (need_mem)
+ netmap_drop_memory_locked(priv);
+ }
if (nifp != NULL) {
/*
* advertise that the interface is ready bt setting ni_nifp.
@@ -2030,251 +1426,6 @@ out:
return nifp;
}
-/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
-static int
-nm_bdg_attach(struct nmreq *nmr)
-{
- struct ifnet *ifp;
- struct netmap_if *nifp;
- struct netmap_priv_d *npriv;
- int error;
-
- npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
- if (npriv == NULL)
- return ENOMEM;
- NMG_LOCK();
- error = get_ifp(nmr, &ifp, 1 /* create if not exists */);
- if (error) /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- /* get_ifp() sets na_bdg if this is a physical interface
- * that we can attach to a switch.
- */
- if (!NETMAP_OWNED_BY_KERN(ifp)) {
- /* got reference to a virtual port or direct access to a NIC.
- * perhaps specified no bridge prefix or wrong NIC name
- */
- error = EINVAL;
- goto unref_exit;
- }
-
- if (NA(ifp)->refcount > 0) { /* already registered */
- error = EBUSY;
- DROP_BDG_REF(ifp);
- goto unlock_exit;
- }
-
- nifp = netmap_do_regif(npriv, ifp, nmr->nr_ringid, &error);
- if (!nifp) {
- goto unref_exit;
- }
-
- NA(ifp)->na_kpriv = npriv;
- NMG_UNLOCK();
- ND("registered %s to netmap-mode", ifp->if_xname);
- return 0;
-
-unref_exit:
- nm_if_rele(ifp);
-unlock_exit:
- NMG_UNLOCK();
- bzero(npriv, sizeof(*npriv));
- free(npriv, M_DEVBUF);
- return error;
-}
-
-static int
-nm_bdg_detach(struct nmreq *nmr)
-{
- struct ifnet *ifp;
- int error;
- int last_instance;
-
- NMG_LOCK();
- error = get_ifp(nmr, &ifp, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
- /* XXX do we need to check this ? */
- if (!NETMAP_OWNED_BY_KERN(ifp)) {
- /* got reference to a virtual port or direct access to a NIC.
- * perhaps specified no bridge's prefix or wrong NIC's name
- */
- error = EINVAL;
- goto unref_exit;
- }
-
- if (NA(ifp)->refcount == 0) { /* not registered */
- error = EINVAL;
- goto unref_exit;
- }
-
- DROP_BDG_REF(ifp); /* the one from get_ifp */
- last_instance = netmap_dtor_locked(NA(ifp)->na_kpriv); /* unregister */
- NMG_UNLOCK();
- if (!last_instance) {
- D("--- error, trying to detach an entry with active mmaps");
- error = EINVAL;
- } else {
- struct netmap_priv_d *npriv = NA(ifp)->na_kpriv;
- NA(ifp)->na_kpriv = NULL;
-
- bzero(npriv, sizeof(*npriv));
- free(npriv, M_DEVBUF);
- }
- return error;
-
-unref_exit:
- nm_if_rele(ifp);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-}
-
-
-/* Initialize necessary fields of sw adapter located in right after hw's
- * one. sw adapter attaches a pair of sw rings of the netmap-mode NIC.
- * It is always activated and deactivated at the same tie with the hw's one.
- * Thus we don't need refcounting on the sw adapter.
- * Regardless of NIC's feature we use separate lock so that anybody can lock
- * me independently from the hw adapter.
- * Make sure nm_register is NULL to be handled as FALSE in nma_is_hw
- */
-static void
-netmap_attach_sw(struct ifnet *ifp)
-{
- struct netmap_adapter *hw_na = NA(ifp);
- struct netmap_adapter *na = SWNA(ifp);
-
- na->ifp = ifp;
- na->num_rx_rings = na->num_tx_rings = 1;
- na->num_tx_desc = hw_na->num_tx_desc;
- na->num_rx_desc = hw_na->num_rx_desc;
- na->nm_txsync = netmap_bdg_to_host;
- /* we use the same memory allocator as the
- * the hw adapter */
- na->nm_mem = hw_na->nm_mem;
-}
-
-
-/* exported to kernel callers, e.g. OVS ?
- * Entry point.
- * Called without NMG_LOCK.
- */
-int
-netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
-{
- struct nm_bridge *b;
- struct netmap_adapter *na;
- struct ifnet *iter;
- char *name = nmr->nr_name;
- int cmd = nmr->nr_cmd, namelen = strlen(name);
- int error = 0, i, j;
-
- switch (cmd) {
- case NETMAP_BDG_ATTACH:
- error = nm_bdg_attach(nmr);
- break;
-
- case NETMAP_BDG_DETACH:
- error = nm_bdg_detach(nmr);
- break;
-
- case NETMAP_BDG_LIST:
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
- error = EINVAL;
- break;
- }
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
- if (!b) {
- error = ENOENT;
- NMG_UNLOCK();
- break;
- }
-
- error = ENOENT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- na = b->bdg_ports[i];
- if (na == NULL) {
- D("---AAAAAAAAARGH-------");
- continue;
- }
- iter = na->ifp;
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(iter->if_xname, name) ||
- (namelen > b->bdg_namelen &&
- !strcmp(iter->if_xname,
- name + b->bdg_namelen + 1))) {
- /* bridge index */
- nmr->nr_arg1 = b - nm_bridges;
- nmr->nr_arg2 = i; /* port index */
- error = 0;
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = nmr->nr_arg1;
- j = nmr->nr_arg2;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = nm_bridges + i;
- if (j >= b->bdg_active_ports) {
- j = 0; /* following bridges scan from 0 */
- continue;
- }
- nmr->nr_arg1 = i;
- nmr->nr_arg2 = j;
- j = b->bdg_port_index[j];
- na = b->bdg_ports[j];
- iter = na->ifp;
- strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
- error = 0;
- break;
- }
- NMG_UNLOCK();
- }
- break;
-
- case NETMAP_BDG_LOOKUP_REG:
- /* register a lookup function to the given bridge.
- * nmr->nr_name may be just bridge's name (including ':'
- * if it is not just NM_NAME).
- */
- if (!func) {
- error = EINVAL;
- break;
- }
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
- if (!b) {
- error = EINVAL;
- } else {
- b->nm_bdg_lookup = func;
- }
- NMG_UNLOCK();
- break;
-
- default:
- D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
- error = EINVAL;
- break;
- }
- return error;
-}
/*
@@ -2290,7 +1441,7 @@ netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
*
* Return 0 on success, errno otherwise.
*/
-static int
+int
netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
int fflag, struct thread *td)
{
@@ -2353,13 +1504,12 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
if (nmr->nr_name[0] != '\0') {
/* get a refcount */
- error = get_ifp(nmr, &ifp, 1 /* create */);
+ error = netmap_get_na(nmr, &na, 1 /* create */);
if (error)
break;
- na = NA(ifp); /* retrieve the netmap adapter */
- nmd = na->nm_mem; /* and its memory allocator */
+ nmd = na->nm_mem; /* get memory allocator */
}
-
+
error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags);
if (error)
break;
@@ -2374,9 +1524,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
nmr->nr_tx_slots = na->num_tx_desc;
if (memflags & NETMAP_MEM_PRIVATE)
nmr->nr_ringid |= NETMAP_PRIV_MEM;
+ netmap_adapter_put(na);
} while (0);
- if (ifp)
- nm_if_rele(ifp); /* return the refcount */
NMG_UNLOCK();
break;
@@ -2388,7 +1537,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
}
/* possibly attach/detach NIC and VALE switch */
i = nmr->nr_cmd;
- if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH) {
+ if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
+ || i == NETMAP_BDG_OFFSET) {
error = netmap_bdg_ctl(nmr, NULL);
break;
} else if (i != 0) {
@@ -2402,36 +1552,35 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
do {
u_int memflags;
- if (priv->np_ifp != NULL) { /* thread already registered */
+ if (priv->np_na != NULL) { /* thread already registered */
error = netmap_set_ringid(priv, nmr->nr_ringid);
break;
}
/* find the interface and a reference */
- error = get_ifp(nmr, &ifp, 1 /* create */); /* keep reference */
+ error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */
if (error)
break;
- if (NETMAP_OWNED_BY_KERN(ifp)) {
- nm_if_rele(ifp);
+ ifp = na->ifp;
+ if (NETMAP_OWNED_BY_KERN(na)) {
+ netmap_adapter_put(na);
error = EBUSY;
break;
}
- nifp = netmap_do_regif(priv, ifp, nmr->nr_ringid, &error);
+ nifp = netmap_do_regif(priv, na, nmr->nr_ringid, &error);
if (!nifp) { /* reg. failed, release priv and ref */
- nm_if_rele(ifp); /* return the refcount */
- priv->np_ifp = NULL;
+ netmap_adapter_put(na);
priv->np_nifp = NULL;
break;
}
/* return the offset of the netmap_if object */
- na = NA(ifp); /* retrieve netmap adapter */
nmr->nr_rx_rings = na->num_rx_rings;
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags);
if (error) {
- nm_if_rele(ifp);
+ netmap_adapter_put(na);
break;
}
if (memflags & NETMAP_MEM_PRIVATE) {
@@ -2459,15 +1608,21 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
}
rmb(); /* make sure following reads are not from cache */
- ifp = priv->np_ifp; /* we have a reference */
+ na = priv->np_na; /* we have a reference */
+ if (na == NULL) {
+ D("Internal error: nifp != NULL && na == NULL");
+ error = ENXIO;
+ break;
+ }
+
+ ifp = na->ifp;
if (ifp == NULL) {
- D("Internal error: nifp != NULL && ifp == NULL");
+ RD(1, "the ifp is gone");
error = ENXIO;
break;
}
- na = NA(ifp); /* retrieve netmap adapter */
if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
if (cmd == NIOCTXSYNC)
netmap_txsync_to_host(na);
@@ -2493,13 +1648,13 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
D("pre txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
kring->nr_hwcur);
- na->nm_txsync(ifp, i, NAF_FORCE_RECLAIM);
+ na->nm_txsync(na, i, NAF_FORCE_RECLAIM);
if (netmap_verbose & NM_VERB_TXSYNC)
D("post txsync ring %d cur %d hwcur %d",
i, kring->ring->cur,
kring->nr_hwcur);
} else {
- na->nm_rxsync(ifp, i, NAF_FORCE_READ);
+ na->nm_rxsync(na, i, NAF_FORCE_READ);
microtime(&na->rx_rings[i].ring->ts);
}
nm_kr_put(kring);
@@ -2521,15 +1676,17 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
bzero(&so, sizeof(so));
NMG_LOCK();
- error = get_ifp(nmr, &ifp, 0 /* don't create */); /* keep reference */
+ error = netmap_get_na(nmr, &na, 0 /* don't create */); /* keep reference */
if (error) {
+ netmap_adapter_put(na);
NMG_UNLOCK();
break;
}
+ ifp = na->ifp;
so.so_vnet = ifp->if_vnet;
// so->so_proto not null.
error = ifioctl(&so, cmd, data, td);
- nm_if_rele(ifp);
+ netmap_adapter_put(na);
NMG_UNLOCK();
break;
}
@@ -2560,7 +1717,7 @@ out:
* The first one is remapped to pwait as selrecord() uses the name as an
* hidden argument.
*/
-static int
+int
netmap_poll(struct cdev *dev, int events, struct thread *td)
{
struct netmap_priv_d *priv = NULL;
@@ -2569,12 +1726,18 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
struct netmap_kring *kring;
u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
u_int lim_tx, lim_rx, host_forwarded = 0;
- struct mbq q = { NULL, NULL, 0 };
+ struct mbq q;
void *pwait = dev; /* linux compatibility */
- int retry_tx = 1;
+ /*
+ * In order to avoid nested locks, we need to "double check"
+ * txsync and rxsync if we decide to do a selrecord().
+ * retry_tx (and retry_rx, later) prevent looping forever.
+ */
+ int retry_tx = 1;
(void)pwait;
+ mbq_init(&q);
if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
return POLLERR;
@@ -2585,18 +1748,22 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
}
rmb(); /* make sure following reads are not from cache */
- ifp = priv->np_ifp;
- // XXX check for deleting() ?
+ na = priv->np_na;
+ ifp = na->ifp;
+ // check for deleted
+ if (ifp == NULL) {
+ RD(1, "the ifp is gone");
+ return POLLERR;
+ }
+
if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
return POLLERR;
if (netmap_verbose & 0x8000)
- D("device %s events 0x%x", ifp->if_xname, events);
+ D("device %s events 0x%x", NM_IFPNAME(ifp), events);
want_tx = events & (POLLOUT | POLLWRNORM);
want_rx = events & (POLLIN | POLLRDNORM);
- na = NA(ifp); /* retrieve netmap adapter */
-
lim_tx = na->num_tx_rings;
lim_rx = na->num_rx_rings;
@@ -2618,7 +1785,11 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
return (revents);
}
- /* if we are in transparent mode, check also the host rx ring */
+ /*
+ * If we are in transparent mode, check also the host rx ring
+ * XXX Transparent mode at the moment requires to bind all
+ * rings to a single file descriptor.
+ */
kring = &na->rx_rings[lim_rx];
if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all
&& want_rx
@@ -2630,8 +1801,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
}
/*
- * check_all is set if the card has more than one queue AND
- * the client is polling all of them. If true, we sleep on
+ * check_all_{tx|rx} are set if the card has more than one queue AND
+ * the file descriptor is bound to all of them. If so, we sleep on
* the "global" selinfo, otherwise we sleep on individual selinfo
* (FreeBSD only allows two selinfo's per file descriptor).
* The interrupt routine in the driver wake one or the other
@@ -2650,9 +1821,11 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
}
/*
- * We start with a lock free round which is good if we have
- * data available. If this fails, then lock and call the sync
+ * We start with a lock free round which is cheap if we have
+ * slots available. If this fails, then lock and call the sync
* routines.
+ * XXX rather than ring->avail >0 should check that
+ * ring->cur has not reached hwcur+hwavail
*/
for (i = priv->np_qfirst; want_rx && i < lim_rx; i++) {
kring = &na->rx_rings[i];
@@ -2673,6 +1846,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
* If we to push packets out (priv->np_txpoll) or want_tx is
* still set, we do need to run the txsync calls (on all rings,
* to avoid that the tx rings stall).
+ * XXX should also check cur != hwcur on the tx rings.
+ * Fortunately, normal tx mode has np_txpoll set.
*/
if (priv->np_txpoll || want_tx) {
/* If we really want to be woken up (want_tx),
@@ -2693,18 +1868,27 @@ flush_tx:
continue;
/* make sure only one user thread is doing this */
if (nm_kr_tryget(kring)) {
- ND("ring %p busy is %d", kring, (int)kring->nr_busy);
+ ND("ring %p busy is %d",
+ kring, (int)kring->nr_busy);
revents |= POLLERR;
goto out;
}
if (netmap_verbose & NM_VERB_TXSYNC)
D("send %d on %s %d",
- kring->ring->cur, ifp->if_xname, i);
- if (na->nm_txsync(ifp, i, 0))
+ kring->ring->cur, NM_IFPNAME(ifp), i);
+ if (na->nm_txsync(na, i, 0))
revents |= POLLERR;
- /* Check avail/call selrecord only if called with POLLOUT */
+ /* Check avail and call selrecord only if
+ * called with POLLOUT and run out of bufs.
+ * XXX Note, we cannot trust much ring->avail
+ * as it is exposed to userspace (even though
+ * just updated by txsync). We should really
+ * check kring->nr_hwavail or better have
+ * txsync set a flag telling if we need
+ * to do a selrecord().
+ */
if (want_tx) {
if (kring->ring->avail > 0) {
/* stop at the first ring. We don't risk
@@ -2748,7 +1932,7 @@ do_retry_rx:
netmap_grab_packets(kring, &q, netmap_fwd);
}
- if (na->nm_rxsync(ifp, i, 0))
+ if (na->nm_rxsync(na, i, 0))
revents |= POLLERR;
if (netmap_no_timestamp == 0 ||
kring->ring->flags & NR_TIMESTAMP) {
@@ -2784,7 +1968,7 @@ do_retry_rx:
}
if (q.head)
- netmap_send_up(na->ifp, q.head);
+ netmap_send_up(na->ifp, &q);
out:
@@ -2793,6 +1977,71 @@ out:
/*------- driver support routines ------*/
+static int netmap_hw_krings_create(struct netmap_adapter *);
+
+static int
+netmap_notify(struct netmap_adapter *na, u_int n_ring, enum txrx tx, int flags)
+{
+ struct netmap_kring *kring;
+
+ if (tx == NR_TX) {
+ kring = na->tx_rings + n_ring;
+ selwakeuppri(&kring->si, PI_NET);
+ if (flags & NAF_GLOBAL_NOTIFY)
+ selwakeuppri(&na->tx_si, PI_NET);
+ } else {
+ kring = na->rx_rings + n_ring;
+ selwakeuppri(&kring->si, PI_NET);
+ if (flags & NAF_GLOBAL_NOTIFY)
+ selwakeuppri(&na->rx_si, PI_NET);
+ }
+ return 0;
+}
+
+
+// XXX check handling of failures
+int
+netmap_attach_common(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
+ D("%s: invalid rings tx %d rx %d",
+ ifp->if_xname, na->num_tx_rings, na->num_rx_rings);
+ return EINVAL;
+ }
+ WNA(ifp) = na;
+ NETMAP_SET_CAPABLE(ifp);
+ if (na->nm_krings_create == NULL) {
+ na->nm_krings_create = netmap_hw_krings_create;
+ na->nm_krings_delete = netmap_krings_delete;
+ }
+ if (na->nm_notify == NULL)
+ na->nm_notify = netmap_notify;
+ na->active_fds = 0;
+
+ if (na->nm_mem == NULL)
+ na->nm_mem = &nm_mem;
+ return 0;
+}
+
+
+void
+netmap_detach_common(struct netmap_adapter *na)
+{
+ if (na->ifp)
+ WNA(na->ifp) = NULL; /* XXX do we need this? */
+
+ if (na->tx_rings) { /* XXX should not happen */
+ D("freeing leftover tx_rings");
+ na->nm_krings_delete(na);
+ }
+ if (na->na_flags & NAF_MEM_OWNER)
+ netmap_mem_private_delete(na->nm_mem);
+ bzero(na, sizeof(*na));
+ free(na, M_DEVBUF);
+}
+
/*
* Initialize a ``netmap_adapter`` object created by driver on attach.
@@ -2809,53 +2058,85 @@ out:
* setups.
*/
int
-netmap_attach(struct netmap_adapter *arg, u_int num_queues)
+netmap_attach(struct netmap_adapter *arg)
{
- struct netmap_adapter *na = NULL;
+ struct netmap_hw_adapter *hwna = NULL;
+ // XXX when is arg == NULL ?
struct ifnet *ifp = arg ? arg->ifp : NULL;
- size_t len;
if (arg == NULL || ifp == NULL)
goto fail;
- /* a VALE port uses two endpoints */
- len = nma_is_vp(arg) ? sizeof(*na) : sizeof(*na) * 2;
- na = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (na == NULL)
+ hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (hwna == NULL)
goto fail;
- WNA(ifp) = na;
- *na = *arg; /* copy everything, trust the driver to not pass junk */
- NETMAP_SET_CAPABLE(ifp);
- if (na->num_tx_rings == 0)
- na->num_tx_rings = num_queues;
- na->num_rx_rings = num_queues;
- na->refcount = na->na_single = na->na_multi = 0;
- /* Core lock initialized here, others after netmap_if_new. */
- mtx_init(&na->core_lock, "netmap core lock", MTX_NETWORK_LOCK, MTX_DEF);
+ hwna->up = *arg;
+ if (netmap_attach_common(&hwna->up)) {
+ free(hwna, M_DEVBUF);
+ goto fail;
+ }
+ netmap_adapter_get(&hwna->up);
+
#ifdef linux
if (ifp->netdev_ops) {
- ND("netdev_ops %p", ifp->netdev_ops);
/* prepare a clone of the netdev ops */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
- na->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
+ hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
#else
- na->nm_ndo = *ifp->netdev_ops;
+ hwna->nm_ndo = *ifp->netdev_ops;
#endif
}
- na->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
+ hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
#endif /* linux */
- na->nm_mem = arg->nm_mem ? arg->nm_mem : &nm_mem;
- if (!nma_is_vp(arg))
- netmap_attach_sw(ifp);
- D("success for %s", ifp->if_xname);
+
+ D("success for %s", NM_IFPNAME(ifp));
return 0;
fail:
- D("fail, arg %p ifp %p na %p", arg, ifp, na);
+ D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
netmap_detach(ifp);
- return (na ? EINVAL : ENOMEM);
+ return (hwna ? EINVAL : ENOMEM);
+}
+
+
+void
+NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
+{
+ if (!na) {
+ return;
+ }
+
+ refcount_acquire(&na->na_refcount);
}
+/* returns 1 iff the netmap_adapter is destroyed */
+int
+NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
+{
+ if (!na)
+ return 1;
+
+ if (!refcount_release(&na->na_refcount))
+ return 0;
+
+ if (na->nm_dtor)
+ na->nm_dtor(na);
+
+ netmap_detach_common(na);
+
+ return 1;
+}
+
+
+int
+netmap_hw_krings_create(struct netmap_adapter *na)
+{
+ return netmap_krings_create(na,
+ na->num_tx_rings + 1, na->num_rx_rings + 1, 0);
+}
+
+
+
/*
* Free the allocated memory linked to the given ``netmap_adapter``
* object.
@@ -2868,33 +2149,22 @@ netmap_detach(struct ifnet *ifp)
if (!na)
return;
- mtx_destroy(&na->core_lock);
-
- if (na->tx_rings) { /* XXX should not happen */
- D("freeing leftover tx_rings");
- free(na->tx_rings, M_DEVBUF);
- }
- if (na->na_flags & NAF_MEM_OWNER)
- netmap_mem_private_delete(na->nm_mem);
- bzero(na, sizeof(*na));
- WNA(ifp) = NULL;
- free(na, M_DEVBUF);
+ NMG_LOCK();
+ netmap_disable_all_rings(ifp);
+ netmap_adapter_put(na);
+ na->ifp = NULL;
+ netmap_enable_all_rings(ifp);
+ NMG_UNLOCK();
}
-int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
- struct netmap_adapter *na, u_int ring_nr);
-
-
/*
* Intercept packets from the network stack and pass them
* to netmap as incoming packets on the 'software' ring.
* We rely on the OS to make sure that the ifp and na do not go
* away (typically the caller checks for IFF_DRV_RUNNING or the like).
* In nm_register() or whenever there is a reinitialization,
- * we make sure to access the core lock and per-ring locks
- * so that IFCAP_NETMAP is visible here.
+ * we make sure to make the mode change visible here.
*/
int
netmap_transmit(struct ifnet *ifp, struct mbuf *m)
@@ -2917,44 +2187,16 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
kring = &na->rx_rings[na->num_rx_rings];
lim = kring->nkr_num_slots - 1;
if (netmap_verbose & NM_VERB_HOST)
- D("%s packet %d len %d from the stack", ifp->if_xname,
+ D("%s packet %d len %d from the stack", NM_IFPNAME(ifp),
kring->nr_hwcur + kring->nr_hwavail, len);
// XXX reconsider long packets if we handle fragments
if (len > NETMAP_BDG_BUF_SIZE(na->nm_mem)) { /* too long for us */
- D("%s from_host, drop packet size %d > %d", ifp->if_xname,
+ D("%s from_host, drop packet size %d > %d", NM_IFPNAME(ifp),
len, NETMAP_BDG_BUF_SIZE(na->nm_mem));
goto done;
}
- if (SWNA(ifp)->na_bdg) {
- struct nm_bdg_fwd *ft;
- char *dst;
-
- na = SWNA(ifp); /* we operate on the host port */
- ft = na->rx_rings[0].nkr_ft;
- dst = BDG_NMB(na->nm_mem, &na->rx_rings[0].ring->slot[0]);
-
- /* use slot 0 in the ft, there is nothing queued here */
- /* XXX we can save the copy calling m_copydata in nm_bdg_flush,
- * need a special flag for this.
- */
- m_copydata(m, 0, (int)len, dst);
- ft->ft_flags = 0;
- ft->ft_len = len;
- ft->ft_buf = dst;
- ft->ft_next = NM_FT_NULL;
- ft->ft_frags = 1;
- if (netmap_verbose & NM_VERB_HOST)
- RD(5, "pkt %p size %d to bridge port %d",
- dst, len, na->bdg_port);
- nm_bdg_flush(ft, 1, na, 0);
- na = NA(ifp); /* back to the regular object/lock */
- error = 0;
- goto done;
- }
-
/* protect against other instances of netmap_transmit,
* and userspace invocations of rxsync().
- * XXX could reuse core_lock
*/
// XXX [Linux] there can be no other instances of netmap_transmit
// on this same ring, but we still need this lock to protect
@@ -2962,18 +2204,18 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
mtx_lock(&kring->q_lock);
if (kring->nr_hwavail >= lim) {
if (netmap_verbose)
- D("stack ring %s full\n", ifp->if_xname);
+ D("stack ring %s full\n", NM_IFPNAME(ifp));
} else {
/* compute the insert position */
i = nm_kr_rxpos(kring);
slot = &kring->ring->slot[i];
- m_copydata(m, 0, (int)len, BDG_NMB(na->nm_mem, slot));
+ m_copydata(m, 0, (int)len, BDG_NMB(na, slot));
slot->len = len;
slot->flags = kring->nkr_slot_flags;
kring->nr_hwavail++;
if (netmap_verbose & NM_VERB_HOST)
- D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);
- selwakeuppri(&kring->si, PI_NET);
+ D("wake up host ring %s %d", NM_IFPNAME(na->ifp), na->num_rx_rings);
+ na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
error = 0;
}
mtx_unlock(&kring->q_lock);
@@ -2994,7 +2236,7 @@ done:
/*
* netmap_reset() is called by the driver routines when reinitializing
* a ring. The driver is in charge of locking to protect the kring.
- * If netmap mode is not set just return NULL.
+ * If native netmap mode is not set just return NULL.
*/
struct netmap_slot *
netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
@@ -3044,6 +2286,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
kring->nkr_hwofs = new_hwofs;
if (tx == NR_TX)
kring->nr_hwavail = lim;
+ kring->nr_hwreserved = 0;
#if 0 // def linux
/* XXX check that the mappings are correct */
@@ -3060,137 +2303,60 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
* We do the wakeup here, but the ring is not yet reconfigured.
* However, we are under lock so there are no races.
*/
- selwakeuppri(&kring->si, PI_NET);
- selwakeuppri(tx == NR_TX ? &na->tx_si : &na->rx_si, PI_NET);
+ na->nm_notify(na, n, tx, NAF_GLOBAL_NOTIFY);
return kring->ring->slot;
}
/*
- * Grab packets from a kring, move them into the ft structure
- * associated to the tx (input) port. Max one instance per port,
- * filtered on input (ioctl, poll or XXX).
- * Returns the next position in the ring.
- */
-static int
-nm_bdg_preflush(struct netmap_adapter *na, u_int ring_nr,
- struct netmap_kring *kring, u_int end)
-{
- struct netmap_ring *ring = kring->ring;
- struct nm_bdg_fwd *ft;
- u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
- u_int ft_i = 0; /* start from 0 */
- u_int frags = 1; /* how many frags ? */
- struct nm_bridge *b = na->na_bdg;
-
- /* To protect against modifications to the bridge we acquire a
- * shared lock, waiting if we can sleep (if the source port is
- * attached to a user process) or with a trylock otherwise (NICs).
- */
- ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
- if (na->na_flags & NAF_BDG_MAYSLEEP)
- BDG_RLOCK(b);
- else if (!BDG_RTRYLOCK(b))
- return 0;
- ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
- ft = kring->nkr_ft;
-
- for (; likely(j != end); j = nm_next(j, lim)) {
- struct netmap_slot *slot = &ring->slot[j];
- char *buf;
-
- ft[ft_i].ft_len = slot->len;
- ft[ft_i].ft_flags = slot->flags;
-
- ND("flags is 0x%x", slot->flags);
- /* this slot goes into a list so initialize the link field */
- ft[ft_i].ft_next = NM_FT_NULL;
- buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
- (void *)(uintptr_t)slot->ptr : BDG_NMB(na->nm_mem, slot);
- prefetch(buf);
- ++ft_i;
- if (slot->flags & NS_MOREFRAG) {
- frags++;
- continue;
- }
- if (unlikely(netmap_verbose && frags > 1))
- RD(5, "%d frags at %d", frags, ft_i - frags);
- ft[ft_i - frags].ft_frags = frags;
- frags = 1;
- if (unlikely((int)ft_i >= bridge_batch))
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
- }
- if (frags > 1) {
- D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
- // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
- ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
- ft[ft_i - frags].ft_frags = frags - 1;
- }
- if (ft_i)
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
- BDG_RUNLOCK(b);
- return j;
-}
-
-
-/*
- * Pass packets from nic to the bridge.
- * XXX TODO check locking: this is called from the interrupt
- * handler so we should make sure that the interface is not
- * disconnected while passing down an interrupt.
+ * Dispatch rx/tx interrupts to the netmap rings.
+ *
+ * "work_done" is non-null on the RX path, NULL for the TX path.
+ * We rely on the OS to make sure that there is only one active
+ * instance per queue, and that there is appropriate locking.
*
- * Note, no user process can access this NIC so we can ignore
- * the info in the 'ring'.
+ * The 'notify' routine depends on what the ring is attached to.
+ * - for a netmap file descriptor, do a selwakeup on the individual
+ * waitqueue, plus one on the global one if needed
+ * - for a switch, call the proper forwarding routine
+ * - XXX more ?
*/
-static void
-netmap_nic_to_bdg(struct ifnet *ifp, u_int ring_nr)
+void
+netmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
struct netmap_adapter *na = NA(ifp);
- struct netmap_kring *kring = &na->rx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, k;
-
- /* make sure that only one thread is ever in here,
- * after which we can unlock. Probably unnecessary XXX.
- */
- if (nm_kr_tryget(kring))
- return;
- /* fetch packets that have arrived.
- * XXX maybe do this in a loop ?
- */
- if (na->nm_rxsync(ifp, ring_nr, 0))
- goto put_out;
- if (kring->nr_hwavail == 0 && netmap_verbose) {
- D("how strange, interrupt with no packets on %s",
- ifp->if_xname);
- goto put_out;
- }
- k = nm_kr_rxpos(kring);
+ struct netmap_kring *kring;
- j = nm_bdg_preflush(na, ring_nr, kring, k);
+ q &= NETMAP_RING_MASK;
- /* we consume everything, but we cannot update kring directly
- * because the nic may have destroyed the info in the NIC ring.
- * So we need to call rxsync again to restore it.
- */
- ring->cur = j;
- ring->avail = 0;
- na->nm_rxsync(ifp, ring_nr, 0);
+ if (netmap_verbose) {
+ RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
+ }
-put_out:
- nm_kr_put(kring);
- return;
+ if (work_done) { /* RX path */
+ if (q >= na->num_rx_rings)
+ return; // not a physical queue
+ kring = na->rx_rings + q;
+ kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ?
+ na->nm_notify(na, q, NR_RX,
+ (na->num_rx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+ *work_done = 1; /* do not fire napi again */
+ } else { /* TX path */
+ if (q >= na->num_tx_rings)
+ return; // not a physical queue
+ kring = na->tx_rings + q;
+ na->nm_notify(na, q, NR_TX,
+ (na->num_tx_rings > 1 ? NAF_GLOBAL_NOTIFY : 0));
+ }
}
-
/*
* Default functions to handle rx/tx interrupts from a physical device.
* "work_done" is non-null on the RX path, NULL for the TX path.
- * We rely on the OS to make sure that there is only one active
- * instance per queue, and that there is appropriate locking.
*
* If the card is not in netmap mode, simply return 0,
* so that the caller proceeds with regular processing.
+ * Otherwise call netmap_common_irq() and return 1.
*
* If the card is connected to a netmap file descriptor,
* do a selwakeup on the individual queue, plus one on the global one
@@ -3203,871 +2369,64 @@ put_out:
int
netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
- struct netmap_adapter *na;
- struct netmap_kring *kring;
-
+ // XXX could we check NAF_NATIVE_ON ?
if (!(ifp->if_capenable & IFCAP_NETMAP))
return 0;
- q &= NETMAP_RING_MASK;
-
- if (netmap_verbose)
- RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
- na = NA(ifp);
- if (na->na_flags & NAF_SKIP_INTR) {
+ if (NA(ifp)->na_flags & NAF_SKIP_INTR) {
ND("use regular interrupt");
return 0;
}
- if (work_done) { /* RX path */
- if (q >= na->num_rx_rings)
- return 0; // not a physical queue
- kring = na->rx_rings + q;
- kring->nr_kflags |= NKR_PENDINTR; // XXX atomic ?
- if (na->na_bdg != NULL) {
- netmap_nic_to_bdg(ifp, q);
- } else {
- selwakeuppri(&kring->si, PI_NET);
- if (na->num_rx_rings > 1 /* or multiple listeners */ )
- selwakeuppri(&na->rx_si, PI_NET);
- }
- *work_done = 1; /* do not fire napi again */
- } else { /* TX path */
- if (q >= na->num_tx_rings)
- return 0; // not a physical queue
- kring = na->tx_rings + q;
- selwakeuppri(&kring->si, PI_NET);
- if (na->num_tx_rings > 1 /* or multiple listeners */ )
- selwakeuppri(&na->tx_si, PI_NET);
- }
+ netmap_common_irq(ifp, q, work_done);
return 1;
}
-#ifdef linux /* linux-specific routines */
-
-
-/*
- * Remap linux arguments into the FreeBSD call.
- * - pwait is the poll table, passed as 'dev';
- * If pwait == NULL someone else already woke up before. We can report
- * events but they are filtered upstream.
- * If pwait != NULL, then pwait->key contains the list of events.
- * - events is computed from pwait as above.
- * - file is passed as 'td';
- */
-static u_int
-linux_netmap_poll(struct file * file, struct poll_table_struct *pwait)
-{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
- int events = POLLIN | POLLOUT; /* XXX maybe... */
-#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
- int events = pwait ? pwait->key : POLLIN | POLLOUT;
-#else /* in 3.4.0 field 'key' was renamed to '_key' */
- int events = pwait ? pwait->_key : POLLIN | POLLOUT;
-#endif
- return netmap_poll((void *)pwait, events, (void *)file);
-}
-
-
-static int
-linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
-{
- int error = 0;
- unsigned long off, va;
- vm_ooffset_t pa;
- struct netmap_priv_d *priv = f->private_data;
- /*
- * vma->vm_start: start of mapping user address space
- * vma->vm_end: end of the mapping user address space
- * vma->vm_pfoff: offset of first page in the device
- */
-
- // XXX security checks
-
- error = netmap_get_memory(priv);
- ND("get_memory returned %d", error);
- if (error)
- return -error;
-
- if ((vma->vm_start & ~PAGE_MASK) || (vma->vm_end & ~PAGE_MASK)) {
- ND("vm_start = %lx vm_end = %lx", vma->vm_start, vma->vm_end);
- return -EINVAL;
- }
-
- for (va = vma->vm_start, off = vma->vm_pgoff;
- va < vma->vm_end;
- va += PAGE_SIZE, off++)
- {
- pa = netmap_mem_ofstophys(priv->np_mref, off << PAGE_SHIFT);
- if (pa == 0)
- return -EINVAL;
-
- ND("va %lx pa %p", va, pa);
- error = remap_pfn_range(vma, va, pa >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot);
- if (error)
- return error;
- }
- return 0;
-}
-
-
/*
- * This one is probably already protected by the netif lock XXX
- */
-static netdev_tx_t
-linux_netmap_start_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- netmap_transmit(dev, skb);
- return (NETDEV_TX_OK);
-}
-
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) // XXX was 37
-#define LIN_IOCTL_NAME .ioctl
-int
-linux_netmap_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long data /* arg */)
-#else
-#define LIN_IOCTL_NAME .unlocked_ioctl
-long
-linux_netmap_ioctl(struct file *file, u_int cmd, u_long data /* arg */)
-#endif
-{
- int ret;
- struct nmreq nmr;
- bzero(&nmr, sizeof(nmr));
-
- if (cmd == NIOCTXSYNC || cmd == NIOCRXSYNC) {
- data = 0; /* no argument required here */
- }
- if (data && copy_from_user(&nmr, (void *)data, sizeof(nmr) ) != 0)
- return -EFAULT;
- ret = netmap_ioctl(NULL, cmd, (caddr_t)&nmr, 0, (void *)file);
- if (data && copy_to_user((void*)data, &nmr, sizeof(nmr) ) != 0)
- return -EFAULT;
- return -ret;
-}
-
-
-static int
-netmap_release(struct inode *inode, struct file *file)
-{
- (void)inode; /* UNUSED */
- if (file->private_data)
- netmap_dtor(file->private_data);
- return (0);
-}
-
-
-static int
-linux_netmap_open(struct inode *inode, struct file *file)
-{
- struct netmap_priv_d *priv;
- (void)inode; /* UNUSED */
-
- priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- if (priv == NULL)
- return -ENOMEM;
-
- file->private_data = priv;
-
- return (0);
-}
-
-
-static struct file_operations netmap_fops = {
- .owner = THIS_MODULE,
- .open = linux_netmap_open,
- .mmap = linux_netmap_mmap,
- LIN_IOCTL_NAME = linux_netmap_ioctl,
- .poll = linux_netmap_poll,
- .release = netmap_release,
-};
-
-
-static struct miscdevice netmap_cdevsw = { /* same name as FreeBSD */
- MISC_DYNAMIC_MINOR,
- "netmap",
- &netmap_fops,
-};
-
-static int netmap_init(void);
-static void netmap_fini(void);
-
-
-/* Errors have negative values on linux */
-static int linux_netmap_init(void)
-{
- return -netmap_init();
-}
-
-module_init(linux_netmap_init);
-module_exit(netmap_fini);
-/* export certain symbols to other modules */
-EXPORT_SYMBOL(netmap_attach); // driver attach routines
-EXPORT_SYMBOL(netmap_detach); // driver detach routines
-EXPORT_SYMBOL(netmap_ring_reinit); // ring init on error
-EXPORT_SYMBOL(netmap_buffer_lut);
-EXPORT_SYMBOL(netmap_total_buffers); // index check
-EXPORT_SYMBOL(netmap_buffer_base);
-EXPORT_SYMBOL(netmap_reset); // ring init routines
-EXPORT_SYMBOL(netmap_buf_size);
-EXPORT_SYMBOL(netmap_rx_irq); // default irq handler
-EXPORT_SYMBOL(netmap_no_pendintr); // XXX mitigation - should go away
-EXPORT_SYMBOL(netmap_bdg_ctl); // bridge configuration routine
-EXPORT_SYMBOL(netmap_bdg_learning); // the default lookup function
-EXPORT_SYMBOL(netmap_disable_all_rings);
-EXPORT_SYMBOL(netmap_enable_all_rings);
-
-
-MODULE_AUTHOR("http://info.iet.unipi.it/~luigi/netmap/");
-MODULE_DESCRIPTION("The netmap packet I/O framework");
-MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
-
-#else /* __FreeBSD__ */
-
-
-static struct cdevsw netmap_cdevsw = {
- .d_version = D_VERSION,
- .d_name = "netmap",
- .d_open = netmap_open,
- .d_mmap_single = netmap_mmap_single,
- .d_ioctl = netmap_ioctl,
- .d_poll = netmap_poll,
- .d_close = netmap_close,
-};
-#endif /* __FreeBSD__ */
-
-/*
- *---- support for virtual bridge -----
- */
-
-/* ----- FreeBSD if_bridge hash function ------- */
-
-/*
- * The following hash function is adapted from "Hash Functions" by Bob Jenkins
- * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ * Module loader and unloader
+ *
+ * netmap_init() creates the /dev/netmap device and initializes
+ * all global variables. Returns 0 on success, errno on failure
+ * (but there is no chance)
*
- * http://www.burtleburtle.net/bob/hash/spooky.html
+ * netmap_fini() destroys everything.
*/
-#define mix(a, b, c) \
-do { \
- a -= b; a -= c; a ^= (c >> 13); \
- b -= c; b -= a; b ^= (a << 8); \
- c -= a; c -= b; c ^= (b >> 13); \
- a -= b; a -= c; a ^= (c >> 12); \
- b -= c; b -= a; b ^= (a << 16); \
- c -= a; c -= b; c ^= (b >> 5); \
- a -= b; a -= c; a ^= (c >> 3); \
- b -= c; b -= a; b ^= (a << 10); \
- c -= a; c -= b; c ^= (b >> 15); \
-} while (/*CONSTCOND*/0)
-
-static __inline uint32_t
-nm_bridge_rthash(const uint8_t *addr)
-{
- uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
-
- b += addr[5] << 8;
- b += addr[4];
- a += addr[3] << 24;
- a += addr[2] << 16;
- a += addr[1] << 8;
- a += addr[0];
-
- mix(a, b, c);
-#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
- return (c & BRIDGE_RTHASH_MASK);
-}
-
-#undef mix
-
-
-static int
-bdg_netmap_reg(struct ifnet *ifp, int onoff)
-{
- /* the interface is already attached to the bridge,
- * so we only need to toggle IFCAP_NETMAP.
- */
- if (onoff) {
- ifp->if_capenable |= IFCAP_NETMAP;
- } else {
- ifp->if_capenable &= ~IFCAP_NETMAP;
- }
- return 0;
-}
+static struct cdev *netmap_dev; /* /dev/netmap character device. */
+extern struct cdevsw netmap_cdevsw;
-/*
- * Lookup function for a learning bridge.
- * Update the hash table with the source address,
- * and then returns the destination port index, and the
- * ring in *dst_ring (at the moment, always use ring 0)
- */
-u_int
-netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
- struct netmap_adapter *na)
+void
+netmap_fini(void)
{
- struct nm_hash_ent *ht = na->na_bdg->ht;
- uint32_t sh, dh;
- u_int dst, mysrc = na->bdg_port;
- uint64_t smac, dmac;
-
- if (buf_len < 14) {
- D("invalid buf length %d", buf_len);
- return NM_BDG_NOPORT;
- }
- dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
- smac = le64toh(*(uint64_t *)(buf + 4));
- smac >>= 16;
-
- /*
- * The hash is somewhat expensive, there might be some
- * worthwhile optimizations here.
- */
- if ((buf[6] & 1) == 0) { /* valid src */
- uint8_t *s = buf+6;
- sh = nm_bridge_rthash(s); // XXX hash of source
- /* update source port forwarding entry */
- ht[sh].mac = smac; /* XXX expire ? */
- ht[sh].ports = mysrc;
- if (netmap_verbose)
- D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
- s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
- }
- dst = NM_BDG_BROADCAST;
- if ((buf[0] & 1) == 0) { /* unicast */
- dh = nm_bridge_rthash(buf); // XXX hash of dst
- if (ht[dh].mac == dmac) { /* found dst */
- dst = ht[dh].ports;
- }
- /* XXX otherwise return NM_BDG_UNKNOWN ? */
- }
- *dst_ring = 0;
- return dst;
+ // XXX destroy_bridges() ?
+ if (netmap_dev)
+ destroy_dev(netmap_dev);
+ netmap_mem_fini();
+ NMG_LOCK_DESTROY();
+ printf("netmap: unloaded module.\n");
}
-
-/*
- * This flush routine supports only unicast and broadcast but a large
- * number of ports, and lets us replace the learn and dispatch functions.
- */
int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_adapter *na,
- u_int ring_nr)
-{
- struct nm_bdg_q *dst_ents, *brddst;
- uint16_t num_dsts = 0, *dsts;
- struct nm_bridge *b = na->na_bdg;
- u_int i, j, me = na->bdg_port;
-
- /*
- * The work area (pointed by ft) is followed by an array of
- * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
- * queues per port plus one for the broadcast traffic.
- * Then we have an array of destination indexes.
- */
- dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
- dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
-
- /* first pass: find a destination for each packet in the batch */
- for (i = 0; likely(i < n); i += ft[i].ft_frags) {
- uint8_t dst_ring = ring_nr; /* default, same ring as origin */
- uint16_t dst_port, d_i;
- struct nm_bdg_q *d;
-
- ND("slot %d frags %d", i, ft[i].ft_frags);
- dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
- &dst_ring, na);
- if (netmap_verbose > 255)
- RD(5, "slot %d port %d -> %d", i, me, dst_port);
- if (dst_port == NM_BDG_NOPORT)
- continue; /* this packet is identified to be dropped */
- else if (unlikely(dst_port > NM_BDG_MAXPORTS))
- continue;
- else if (dst_port == NM_BDG_BROADCAST)
- dst_ring = 0; /* broadcasts always go to ring 0 */
- else if (unlikely(dst_port == me ||
- !b->bdg_ports[dst_port]))
- continue;
-
- /* get a position in the scratch pad */
- d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
- d = dst_ents + d_i;
-
- /* append the first fragment to the list */
- if (d->bq_head == NM_FT_NULL) { /* new destination */
- d->bq_head = d->bq_tail = i;
- /* remember this position to be scanned later */
- if (dst_port != NM_BDG_BROADCAST)
- dsts[num_dsts++] = d_i;
- } else {
- ft[d->bq_tail].ft_next = i;
- d->bq_tail = i;
- }
- d->bq_len += ft[i].ft_frags;
- }
-
- /*
- * Broadcast traffic goes to ring 0 on all destinations.
- * So we need to add these rings to the list of ports to scan.
- * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
- * expensive. We should keep a compact list of active destinations
- * so we could shorten this loop.
- */
- brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
- if (brddst->bq_head != NM_FT_NULL) {
- for (j = 0; likely(j < b->bdg_active_ports); j++) {
- uint16_t d_i;
- i = b->bdg_port_index[j];
- if (unlikely(i == me))
- continue;
- d_i = i * NM_BDG_MAXRINGS;
- if (dst_ents[d_i].bq_head == NM_FT_NULL)
- dsts[num_dsts++] = d_i;
- }
- }
-
- ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
- /* second pass: scan destinations (XXX will be modular somehow) */
- for (i = 0; i < num_dsts; i++) {
- struct ifnet *dst_ifp;
- struct netmap_adapter *dst_na;
- struct netmap_kring *kring;
- struct netmap_ring *ring;
- u_int dst_nr, is_vp, lim, j, sent = 0, d_i, next, brd_next;
- u_int needed, howmany;
- int retry = netmap_txsync_retry;
- struct nm_bdg_q *d;
- uint32_t my_start = 0, lease_idx = 0;
- int nrings;
-
- d_i = dsts[i];
- ND("second pass %d port %d", i, d_i);
- d = dst_ents + d_i;
- // XXX fix the division
- dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
- /* protect from the lookup function returning an inactive
- * destination port
- */
- if (unlikely(dst_na == NULL))
- goto cleanup;
- if (dst_na->na_flags & NAF_SW_ONLY)
- goto cleanup;
- dst_ifp = dst_na->ifp;
- /*
- * The interface may be in !netmap mode in two cases:
- * - when na is attached but not activated yet;
- * - when na is being deactivated but is still attached.
- */
- if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
- ND("not in netmap mode!");
- goto cleanup;
- }
-
- /* there is at least one either unicast or broadcast packet */
- brd_next = brddst->bq_head;
- next = d->bq_head;
- /* we need to reserve this many slots. If fewer are
- * available, some packets will be dropped.
- * Packets may have multiple fragments, so we may not use
- * there is a chance that we may not use all of the slots
- * we have claimed, so we will need to handle the leftover
- * ones when we regain the lock.
- */
- needed = d->bq_len + brddst->bq_len;
-
- is_vp = nma_is_vp(dst_na);
- ND(5, "pass 2 dst %d is %x %s",
- i, d_i, is_vp ? "virtual" : "nic/host");
- dst_nr = d_i & (NM_BDG_MAXRINGS-1);
- if (is_vp) { /* virtual port */
- nrings = dst_na->num_rx_rings;
- } else {
- nrings = dst_na->num_tx_rings;
- }
- if (dst_nr >= nrings)
- dst_nr = dst_nr % nrings;
- kring = is_vp ? &dst_na->rx_rings[dst_nr] :
- &dst_na->tx_rings[dst_nr];
- ring = kring->ring;
- lim = kring->nkr_num_slots - 1;
-
-retry:
-
- /* reserve the buffers in the queue and an entry
- * to report completion, and drop lock.
- * XXX this might become a helper function.
- */
- mtx_lock(&kring->q_lock);
- if (kring->nkr_stopped) {
- mtx_unlock(&kring->q_lock);
- goto cleanup;
- }
- /* on physical interfaces, do a txsync to recover
- * slots for packets already transmitted.
- * XXX maybe we could be optimistic and rely on a retry
- * in case of failure.
- */
- if (nma_is_hw(dst_na)) {
- dst_na->nm_txsync(dst_ifp, dst_nr, 0);
- }
- my_start = j = kring->nkr_hwlease;
- howmany = nm_kr_space(kring, is_vp);
- if (needed < howmany)
- howmany = needed;
- lease_idx = nm_kr_lease(kring, howmany, is_vp);
- mtx_unlock(&kring->q_lock);
-
- /* only retry if we need more than available slots */
- if (retry && needed <= howmany)
- retry = 0;
-
- /* copy to the destination queue */
- while (howmany > 0) {
- struct netmap_slot *slot;
- struct nm_bdg_fwd *ft_p, *ft_end;
- u_int cnt;
-
- /* find the queue from which we pick next packet.
- * NM_FT_NULL is always higher than valid indexes
- * so we never dereference it if the other list
- * has packets (and if both are empty we never
- * get here).
- */
- if (next < brd_next) {
- ft_p = ft + next;
- next = ft_p->ft_next;
- } else { /* insert broadcast */
- ft_p = ft + brd_next;
- brd_next = ft_p->ft_next;
- }
- cnt = ft_p->ft_frags; // cnt > 0
- if (unlikely(cnt > howmany))
- break; /* no more space */
- howmany -= cnt;
- if (netmap_verbose && cnt > 1)
- RD(5, "rx %d frags to %d", cnt, j);
- ft_end = ft_p + cnt;
- do {
- void *dst, *src = ft_p->ft_buf;
- size_t len = (ft_p->ft_len + 63) & ~63;
-
- slot = &ring->slot[j];
- dst = BDG_NMB(dst_na->nm_mem, slot);
- /* round to a multiple of 64 */
-
- ND("send %d %d bytes at %s:%d",
- i, ft_p->ft_len, dst_ifp->if_xname, j);
- if (ft_p->ft_flags & NS_INDIRECT) {
- if (copyin(src, dst, len)) {
- // invalid user pointer, pretend len is 0
- ft_p->ft_len = 0;
- }
- } else {
- //memcpy(dst, src, len);
- pkt_copy(src, dst, (int)len);
- }
- slot->len = ft_p->ft_len;
- slot->flags = (cnt << 8)| NS_MOREFRAG;
- j = nm_next(j, lim);
- ft_p++;
- sent++;
- } while (ft_p != ft_end);
- slot->flags = (cnt << 8); /* clear flag on last entry */
- /* are we done ? */
- if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
- break;
- }
- {
- /* current position */
- uint32_t *p = kring->nkr_leases; /* shorthand */
- uint32_t update_pos;
- int still_locked = 1;
-
- mtx_lock(&kring->q_lock);
- if (unlikely(howmany > 0)) {
- /* not used all bufs. If i am the last one
- * i can recover the slots, otherwise must
- * fill them with 0 to mark empty packets.
- */
- ND("leftover %d bufs", howmany);
- if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
- /* yes i am the last one */
- ND("roll back nkr_hwlease to %d", j);
- kring->nkr_hwlease = j;
- } else {
- while (howmany-- > 0) {
- ring->slot[j].len = 0;
- ring->slot[j].flags = 0;
- j = nm_next(j, lim);
- }
- }
- }
- p[lease_idx] = j; /* report I am done */
-
- update_pos = is_vp ? nm_kr_rxpos(kring) : ring->cur;
-
- if (my_start == update_pos) {
- /* all slots before my_start have been reported,
- * so scan subsequent leases to see if other ranges
- * have been completed, and to a selwakeup or txsync.
- */
- while (lease_idx != kring->nkr_lease_idx &&
- p[lease_idx] != NR_NOSLOT) {
- j = p[lease_idx];
- p[lease_idx] = NR_NOSLOT;
- lease_idx = nm_next(lease_idx, lim);
- }
- /* j is the new 'write' position. j != my_start
- * means there are new buffers to report
- */
- if (likely(j != my_start)) {
- if (is_vp) {
- uint32_t old_avail = kring->nr_hwavail;
-
- kring->nr_hwavail = (j >= kring->nr_hwcur) ?
- j - kring->nr_hwcur :
- j + lim + 1 - kring->nr_hwcur;
- if (kring->nr_hwavail < old_avail) {
- D("avail shrink %d -> %d",
- old_avail, kring->nr_hwavail);
- }
- still_locked = 0;
- mtx_unlock(&kring->q_lock);
- selwakeuppri(&kring->si, PI_NET);
- } else {
- ring->cur = j;
- /* XXX update avail ? */
- still_locked = 0;
- dst_na->nm_txsync(dst_ifp, dst_nr, 0);
- mtx_unlock(&kring->q_lock);
-
- /* retry to send more packets */
- if (nma_is_hw(dst_na) && retry--)
- goto retry;
- }
- }
- }
- if (still_locked)
- mtx_unlock(&kring->q_lock);
- }
-cleanup:
- d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
- d->bq_len = 0;
- }
- brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
- brddst->bq_len = 0;
- return 0;
-}
-
-
-/*
- * main dispatch routine for the bridge.
- * We already know that only one thread is running this.
- * we must run nm_bdg_preflush without lock.
- */
-static int
-bdg_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
-{
- struct netmap_adapter *na = NA(ifp);
- struct netmap_kring *kring = &na->tx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, k, lim = kring->nkr_num_slots - 1;
-
- k = ring->cur;
- if (k > lim)
- return netmap_ring_reinit(kring);
-
- if (bridge_batch <= 0) { /* testing only */
- j = k; // used all
- goto done;
- }
- if (bridge_batch > NM_BDG_BATCH)
- bridge_batch = NM_BDG_BATCH;
-
- j = nm_bdg_preflush(na, ring_nr, kring, k);
- if (j != k)
- D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
- /* k-j modulo ring size is the number of slots processed */
- if (k < j)
- k += kring->nkr_num_slots;
- kring->nr_hwavail = lim - (k - j);
-
-done:
- kring->nr_hwcur = j;
- ring->avail = kring->nr_hwavail;
- if (netmap_verbose)
- D("%s ring %d flags %d", ifp->if_xname, ring_nr, flags);
- return 0;
-}
-
-
-/*
- * user process reading from a VALE switch.
- * Already protected against concurrent calls from userspace,
- * but we must acquire the queue's lock to protect against
- * writers on the same queue.
- */
-static int
-bdg_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
-{
- struct netmap_adapter *na = NA(ifp);
- struct netmap_kring *kring = &na->rx_rings[ring_nr];
- struct netmap_ring *ring = kring->ring;
- u_int j, lim = kring->nkr_num_slots - 1;
- u_int k = ring->cur, resvd = ring->reserved;
- int n;
-
- mtx_lock(&kring->q_lock);
- if (k > lim) {
- D("ouch dangerous reset!!!");
- n = netmap_ring_reinit(kring);
- goto done;
- }
-
- /* skip past packets that userspace has released */
- j = kring->nr_hwcur; /* netmap ring index */
- if (resvd > 0) {
- if (resvd + ring->avail >= lim + 1) {
- D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
- ring->reserved = resvd = 0; // XXX panic...
- }
- k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
- }
-
- if (j != k) { /* userspace has released some packets. */
- n = k - j;
- if (n < 0)
- n += kring->nkr_num_slots;
- ND("userspace releases %d packets", n);
- for (n = 0; likely(j != k); n++) {
- struct netmap_slot *slot = &ring->slot[j];
- void *addr = BDG_NMB(na->nm_mem, slot);
-
- if (addr == netmap_buffer_base) { /* bad buf */
- D("bad buffer index %d, ignore ?",
- slot->buf_idx);
- }
- slot->flags &= ~NS_BUF_CHANGED;
- j = nm_next(j, lim);
- }
- kring->nr_hwavail -= n;
- kring->nr_hwcur = k;
- }
- /* tell userspace that there are new packets */
- ring->avail = kring->nr_hwavail - resvd;
- n = 0;
-done:
- mtx_unlock(&kring->q_lock);
- return n;
-}
-
-
-static int
-bdg_netmap_attach(struct netmap_adapter *arg)
-{
- struct netmap_adapter na;
-
- ND("attaching virtual bridge");
- bzero(&na, sizeof(na));
-
- na.ifp = arg->ifp;
- na.na_flags = NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
- na.num_tx_rings = arg->num_tx_rings;
- na.num_rx_rings = arg->num_rx_rings;
- na.num_tx_desc = arg->num_tx_desc;
- na.num_rx_desc = arg->num_rx_desc;
- na.nm_txsync = bdg_netmap_txsync;
- na.nm_rxsync = bdg_netmap_rxsync;
- na.nm_register = bdg_netmap_reg;
- na.nm_mem = netmap_mem_private_new(arg->ifp->if_xname,
- na.num_tx_rings, na.num_tx_desc,
- na.num_rx_rings, na.num_rx_desc);
- return netmap_attach(&na, na.num_tx_rings);
-}
-
-
-static struct cdev *netmap_dev; /* /dev/netmap character device. */
-
-
-/*
- * Module loader.
- *
- * Create the /dev/netmap device and initialize all global
- * variables.
- *
- * Return 0 on success, errno on failure.
- */
-static int
netmap_init(void)
{
- int i, error;
+ int error;
NMG_LOCK_INIT();
error = netmap_mem_init();
- if (error != 0) {
- printf("netmap: unable to initialize the memory allocator.\n");
- return (error);
- }
- printf("netmap: loaded module\n");
+ if (error != 0)
+ goto fail;
+ /* XXX could use make_dev_credv() to get error number */
netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
"netmap");
+ if (!netmap_dev)
+ goto fail;
- bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
- for (i = 0; i < NM_BRIDGES; i++)
- BDG_RWINIT(&nm_bridges[i]);
- return (error);
-}
-
-
-/*
- * Module unloader.
- *
- * Free all the memory, and destroy the ``/dev/netmap`` device.
- */
-static void
-netmap_fini(void)
-{
- destroy_dev(netmap_dev);
- netmap_mem_fini();
- NMG_LOCK_DESTROY();
- printf("netmap: unloaded module.\n");
-}
-
-
-#ifdef __FreeBSD__
-/*
- * Kernel entry point.
- *
- * Initialize/finalize the module and return.
- *
- * Return 0 on success, errno on failure.
- */
-static int
-netmap_loader(__unused struct module *module, int event, __unused void *arg)
-{
- int error = 0;
-
- switch (event) {
- case MOD_LOAD:
- error = netmap_init();
- break;
-
- case MOD_UNLOAD:
- netmap_fini();
- break;
-
- default:
- error = EOPNOTSUPP;
- break;
- }
-
- return (error);
+ netmap_init_bridges();
+ printf("netmap: loaded module\n");
+ return (0);
+fail:
+ netmap_fini();
+ return (EINVAL); /* may be incorrect */
}
-
-
-DEV_MODULE(netmap, netmap_loader, NULL);
-#endif /* __FreeBSD__ */
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
new file mode 100644
index 000000000000..c2814146d2ef
--- /dev/null
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -0,0 +1,410 @@
+/*
+ * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* DEV_MODULE */
+
+#include <sys/rwlock.h>
+
+#include <vm/vm.h> /* vtophys */
+#include <vm/pmap.h> /* vtophys */
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+
+#include <sys/malloc.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
+
+/*
+ * Intercept the rx routine in the standard device driver.
+ * Second argument is non-zero to intercept, 0 to restore
+ */
+int
+netmap_catch_rx(struct netmap_adapter *na, int intercept)
+{
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct ifnet *ifp = na->ifp;
+
+ if (intercept) {
+ if (gna->save_if_input) {
+ D("cannot intercept again");
+ return EINVAL; /* already set */
+ }
+ gna->save_if_input = ifp->if_input;
+ ifp->if_input = generic_rx_handler;
+ } else {
+ if (!gna->save_if_input){
+ D("cannot restore");
+ return EINVAL; /* not saved */
+ }
+ ifp->if_input = gna->save_if_input;
+ gna->save_if_input = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * Intercept the packet steering routine in the tx path,
+ * so that we can decide which queue is used for an mbuf.
+ * Second argument is non-zero to intercept, 0 to restore.
+ *
+ * XXX see if FreeBSD has such a mechanism
+ */
+void
+netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable)
+{
+ if (enable) {
+ } else {
+ }
+}
+
+/* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
+ * and non-zero on error (which may be packet drops or other errors).
+ * addr and len identify the netmap buffer, m is the (preallocated)
+ * mbuf to use for transmissions.
+ *
+ * We should add a reference to the mbuf so the m_freem() at the end
+ * of the transmission does not consume resources.
+ *
+ * On FreeBSD, and on multiqueue cards, we can force the queue using
+ * if ((m->m_flags & M_FLOWID) != 0)
+ * i = m->m_pkthdr.flowid % adapter->num_queues;
+ * else
+ * i = curcpu % adapter->num_queues;
+ *
+ */
+int
+generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
+ void *addr, u_int len, u_int ring_nr)
+{
+ int ret;
+
+ m->m_len = m->m_pkthdr.len = 0;
+
+ // copy data to the mbuf
+ m_copyback(m, 0, len, addr);
+
+ // inc refcount. We are alone, so we can skip the atomic
+ atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
+ m->m_flags |= M_FLOWID;
+ m->m_pkthdr.flowid = ring_nr;
+ m->m_pkthdr.rcvif = ifp; /* used for tx notification */
+ ret = ifp->if_transmit(ifp, m);
+ return ret;
+}
+
+/*
+ * The following two functions are empty until we have a generic
+ * way to extract the info from the ifp
+ */
+int
+generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
+{
+ D("called");
+ return 0;
+}
+
+void
+generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
+{
+ D("called");
+ *txq = 1;
+ *rxq = 1;
+}
+
+void netmap_mitigation_init(struct netmap_generic_adapter *na)
+{
+ ND("called");
+ na->mit_pending = 0;
+}
+
+
+void netmap_mitigation_start(struct netmap_generic_adapter *na)
+{
+ ND("called");
+}
+
+void netmap_mitigation_restart(struct netmap_generic_adapter *na)
+{
+ ND("called");
+}
+
+int netmap_mitigation_active(struct netmap_generic_adapter *na)
+{
+ ND("called");
+ return 0;
+}
+
+void netmap_mitigation_cleanup(struct netmap_generic_adapter *na)
+{
+ ND("called");
+}
+
+/*
+ * In order to track whether pages are still mapped, we hook into
+ * the standard cdev_pager and intercept the constructor and
+ * destructor.
+ */
+
+struct netmap_vm_handle_t {
+ struct cdev *dev;
+ struct netmap_priv_d *priv;
+};
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+ vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+ struct netmap_vm_handle_t *vmh = handle;
+ D("handle %p size %jd prot %d foff %jd",
+ handle, (intmax_t)size, prot, (intmax_t)foff);
+ dev_ref(vmh->dev);
+ return 0;
+}
+
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+ struct netmap_vm_handle_t *vmh = handle;
+ struct cdev *dev = vmh->dev;
+ struct netmap_priv_d *priv = vmh->priv;
+ D("handle %p", handle);
+ netmap_dtor(priv);
+ free(vmh, M_DEVBUF);
+ dev_rel(dev);
+}
+
+static int
+netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
+ int prot, vm_page_t *mres)
+{
+ struct netmap_vm_handle_t *vmh = object->handle;
+ struct netmap_priv_d *priv = vmh->priv;
+ vm_paddr_t paddr;
+ vm_page_t page;
+ vm_memattr_t memattr;
+ vm_pindex_t pidx;
+
+ ND("object %p offset %jd prot %d mres %p",
+ object, (intmax_t)offset, prot, mres);
+ memattr = object->memattr;
+ pidx = OFF_TO_IDX(offset);
+ paddr = netmap_mem_ofstophys(priv->np_mref, offset);
+ if (paddr == 0)
+ return VM_PAGER_FAIL;
+
+ if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+ /*
+ * If the passed in result page is a fake page, update it with
+ * the new physical address.
+ */
+ page = *mres;
+ vm_page_updatefake(page, paddr, memattr);
+ } else {
+ /*
+ * Replace the passed in reqpage page with our own fake page and
+ * free up the all of the original pages.
+ */
+#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
+#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
+#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
+#endif /* VM_OBJECT_WUNLOCK */
+
+ VM_OBJECT_WUNLOCK(object);
+ page = vm_page_getfake(paddr, memattr);
+ VM_OBJECT_WLOCK(object);
+ vm_page_lock(*mres);
+ vm_page_free(*mres);
+ vm_page_unlock(*mres);
+ *mres = page;
+ vm_page_insert(page, object, pidx);
+ }
+ page->valid = VM_PAGE_BITS_ALL;
+ return (VM_PAGER_OK);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+ .cdev_pg_ctor = netmap_dev_pager_ctor,
+ .cdev_pg_dtor = netmap_dev_pager_dtor,
+ .cdev_pg_fault = netmap_dev_pager_fault,
+};
+
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+ vm_size_t objsize, vm_object_t *objp, int prot)
+{
+ int error;
+ struct netmap_vm_handle_t *vmh;
+ struct netmap_priv_d *priv;
+ vm_object_t obj;
+
+ D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+ (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+
+ vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (vmh == NULL)
+ return ENOMEM;
+ vmh->dev = cdev;
+
+ NMG_LOCK();
+ error = devfs_get_cdevpriv((void**)&priv);
+ if (error)
+ goto err_unlock;
+ vmh->priv = priv;
+ priv->np_refcount++;
+ NMG_UNLOCK();
+
+ error = netmap_get_memory(priv);
+ if (error)
+ goto err_deref;
+
+ obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
+ &netmap_cdev_pager_ops, objsize, prot,
+ *foff, NULL);
+ if (obj == NULL) {
+ D("cdev_pager_allocate failed");
+ error = EINVAL;
+ goto err_deref;
+ }
+
+ *objp = obj;
+ return 0;
+
+err_deref:
+ NMG_LOCK();
+ priv->np_refcount--;
+err_unlock:
+ NMG_UNLOCK();
+// err:
+ free(vmh, M_DEVBUF);
+ return error;
+}
+
+
+// XXX can we remove this ?
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ if (netmap_verbose)
+ D("dev %p fflag 0x%x devtype %d td %p",
+ dev, fflag, devtype, td);
+ return 0;
+}
+
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ struct netmap_priv_d *priv;
+ int error;
+
+ (void)dev;
+ (void)oflags;
+ (void)devtype;
+ (void)td;
+
+ // XXX wait or nowait ?
+ priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (priv == NULL)
+ return ENOMEM;
+
+ error = devfs_set_cdevpriv(priv, netmap_dtor);
+ if (error)
+ return error;
+
+ priv->np_refcount = 1;
+
+ return 0;
+}
+
+
+struct cdevsw netmap_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = "netmap",
+ .d_open = netmap_open,
+ .d_mmap_single = netmap_mmap_single,
+ .d_ioctl = netmap_ioctl,
+ .d_poll = netmap_poll,
+ .d_close = netmap_close,
+};
+
+
+/*
+ * Kernel entry point.
+ *
+ * Initialize/finalize the module and return.
+ *
+ * Return 0 on success, errno on failure.
+ */
+static int
+netmap_loader(__unused struct module *module, int event, __unused void *arg)
+{
+ int error = 0;
+
+ switch (event) {
+ case MOD_LOAD:
+ error = netmap_init();
+ break;
+
+ case MOD_UNLOAD:
+ netmap_fini();
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+
+DEV_MODULE(netmap, netmap_loader, NULL);
diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
new file mode 100644
index 000000000000..2c42db3f8862
--- /dev/null
+++ b/sys/dev/netmap/netmap_generic.c
@@ -0,0 +1,818 @@
+/*
+ * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This module implements netmap support on top of standard,
+ * unmodified device drivers.
+ *
+ * A NIOCREGIF request is handled here if the device does not
+ * have native support. TX and RX rings are emulated as follows:
+ *
+ * NIOCREGIF
+ * We preallocate a block of TX mbufs (roughly as many as
+ * tx descriptors; the number is not critical) to speed up
+ * operation during transmissions. The refcount on most of
+ * these buffers is artificially bumped up so we can recycle
+ * them more easily. Also, the destructor is intercepted
+ * so we use it as an interrupt notification to wake up
+ * processes blocked on a poll().
+ *
+ * For each receive ring we allocate one "struct mbq"
+ * (an mbuf tailq plus a spinlock). We intercept packets
+ * (through if_input)
+ * on the receive path and put them in the mbq from which
+ * netmap receive routines can grab them.
+ *
+ * TX:
+ * in the generic_txsync() routine, netmap buffers are copied
+ * (or linked, in a future) to the preallocated mbufs
+ * and pushed to the transmit queue. Some of these mbufs
+ * (those with NS_REPORT, or otherwise every half ring)
+ * have the refcount=1, others have refcount=2.
+ * When the destructor is invoked, we take that as
+ * a notification that all mbufs up to that one in
+ * the specific ring have been completed, and generate
+ * the equivalent of a transmit interrupt.
+ *
+ * RX:
+ *
+ */
+
+#ifdef __FreeBSD__
+
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/lock.h> /* PROT_EXEC */
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
+
+// XXX temporary - D() defined here
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#define rtnl_lock() D("rtnl_lock called");
+#define rtnl_unlock() D("rtnl_lock called");
+#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
+#define smp_mb()
+
+/*
+ * mbuf wrappers
+ */
+
+/*
+ * we allocate an EXT_PACKET
+ */
+#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
+
+/* mbuf destructor, also need to change the type to EXT_EXTREF,
+ * add an M_NOFREE flag, and then clear the flag and
+ * chain into uma_zfree(zone_pack, mf)
+ * (or reinstall the buffer ?)
+ */
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (void *)fn; \
+ (m)->m_ext.ext_type = EXT_EXTREF; \
+ } while (0)
+
+
+#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
+
+
+
+#else /* linux */
+
+#include "bsd_glue.h"
+
+#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
+#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
+#include <linux/hrtimer.h>
+
+//#define RATE /* Enables communication statistics. */
+
+//#define REG_RESET
+
+#endif /* linux */
+
+
+/* Common headers. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+
+/* ======================== usage stats =========================== */
+
+#ifdef RATE
+#define IFRATE(x) x
+struct rate_stats {
+ unsigned long txpkt;
+ unsigned long txsync;
+ unsigned long txirq;
+ unsigned long rxpkt;
+ unsigned long rxirq;
+ unsigned long rxsync;
+};
+
+struct rate_context {
+ unsigned refcount;
+ struct timer_list timer;
+ struct rate_stats new;
+ struct rate_stats old;
+};
+
+#define RATE_PRINTK(_NAME_) \
+ printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
+#define RATE_PERIOD 2
+static void rate_callback(unsigned long arg)
+{
+ struct rate_context * ctx = (struct rate_context *)arg;
+ struct rate_stats cur = ctx->new;
+ int r;
+
+ RATE_PRINTK(txpkt);
+ RATE_PRINTK(txsync);
+ RATE_PRINTK(txirq);
+ RATE_PRINTK(rxpkt);
+ RATE_PRINTK(rxsync);
+ RATE_PRINTK(rxirq);
+ printk("\n");
+
+ ctx->old = cur;
+ r = mod_timer(&ctx->timer, jiffies +
+ msecs_to_jiffies(RATE_PERIOD * 1000));
+ if (unlikely(r))
+ D("[v1000] Error: mod_timer()");
+}
+
+static struct rate_context rate_ctx;
+
+#else /* !RATE */
+#define IFRATE(x)
+#endif /* !RATE */
+
+
+/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
+#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
+
+/*
+ * Wrapper used by the generic adapter layer to notify
+ * the poller threads. Differently from netmap_rx_irq(), we check
+ * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
+ */
+static void
+netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
+{
+ if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
+ return;
+
+ netmap_common_irq(ifp, q, work_done);
+}
+
+
+/* Enable/disable netmap mode for a generic network interface. */
+int generic_netmap_register(struct netmap_adapter *na, int enable)
+{
+ struct ifnet *ifp = na->ifp;
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct mbuf *m;
+ int error;
+ int i, r;
+
+ if (!na)
+ return EINVAL;
+
+#ifdef REG_RESET
+ error = ifp->netdev_ops->ndo_stop(ifp);
+ if (error) {
+ return error;
+ }
+#endif /* REG_RESET */
+
+ if (enable) { /* Enable netmap mode. */
+ /* Initialize the rx queue, as generic_rx_handler() can
+ * be called as soon as netmap_catch_rx() returns.
+ */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_init(&na->rx_rings[r].rx_queue);
+ na->rx_rings[r].nr_ntc = 0;
+ }
+
+ /* Init the mitigation timer. */
+ netmap_mitigation_init(gna);
+
+ /*
+ * Preallocate packet buffers for the tx rings.
+ */
+ for (r=0; r<na->num_tx_rings; r++) {
+ na->tx_rings[r].nr_ntc = 0;
+ na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!na->tx_rings[r].tx_pool) {
+ D("tx_pool allocation failed");
+ error = ENOMEM;
+ goto free_tx_pool;
+ }
+ for (i=0; i<na->num_tx_desc; i++) {
+ m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (!m) {
+ D("tx_pool[%d] allocation failed", i);
+ error = ENOMEM;
+ goto free_mbufs;
+ }
+ na->tx_rings[r].tx_pool[i] = m;
+ }
+ }
+ rtnl_lock();
+ /* Prepare to intercept incoming traffic. */
+ error = netmap_catch_rx(na, 1);
+ if (error) {
+ D("netdev_rx_handler_register() failed");
+ goto register_handler;
+ }
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* Make netmap control the packet steering. */
+ netmap_catch_packet_steering(gna, 1);
+
+ rtnl_unlock();
+
+#ifdef RATE
+ if (rate_ctx.refcount == 0) {
+ D("setup_timer()");
+ memset(&rate_ctx, 0, sizeof(rate_ctx));
+ setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
+ if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
+ D("Error: mod_timer()");
+ }
+ }
+ rate_ctx.refcount++;
+#endif /* RATE */
+
+ } else { /* Disable netmap mode. */
+ rtnl_lock();
+
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+
+ /* Release packet steering control. */
+ netmap_catch_packet_steering(gna, 0);
+
+ /* Do not intercept packets on the rx path. */
+ netmap_catch_rx(na, 0);
+
+ rtnl_unlock();
+
+ /* Free the mbufs going to the netmap rings */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_purge(&na->rx_rings[r].rx_queue);
+ mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+ }
+
+ netmap_mitigation_cleanup(gna);
+
+ for (r=0; r<na->num_tx_rings; r++) {
+ for (i=0; i<na->num_tx_desc; i++) {
+ m_freem(na->tx_rings[r].tx_pool[i]);
+ }
+ free(na->tx_rings[r].tx_pool, M_DEVBUF);
+ }
+
+#ifdef RATE
+ if (--rate_ctx.refcount == 0) {
+ D("del_timer()");
+ del_timer(&rate_ctx.timer);
+ }
+#endif
+ }
+
+#ifdef REG_RESET
+ error = ifp->netdev_ops->ndo_open(ifp);
+ if (error) {
+ goto alloc_tx_pool;
+ }
+#endif
+
+ return 0;
+
+register_handler:
+ rtnl_unlock();
+free_tx_pool:
+ r--;
+ i = na->num_tx_desc; /* Useless, but just to stay safe. */
+free_mbufs:
+ i--;
+ for (; r>=0; r--) {
+ for (; i>=0; i--) {
+ m_freem(na->tx_rings[r].tx_pool[i]);
+ }
+ free(na->tx_rings[r].tx_pool, M_DEVBUF);
+ i = na->num_tx_desc - 1;
+ }
+
+ return error;
+}
+
+/*
+ * Callback invoked when the device driver frees an mbuf used
+ * by netmap to transmit a packet. This usually happens when
+ * the NIC notifies the driver that transmission is completed.
+ */
+static void
+generic_mbuf_destructor(struct mbuf *m)
+{
+ if (netmap_verbose)
+ D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
+ netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
+#ifdef __FreeBSD__
+ m->m_ext.ext_type = EXT_PACKET;
+ m->m_ext.ext_free = NULL;
+ if (*(m->m_ext.ref_cnt) == 0)
+ *(m->m_ext.ref_cnt) = 1;
+ uma_zfree(zone_pack, m);
+#endif /* __FreeBSD__ */
+ IFRATE(rate_ctx.new.txirq++);
+}
+
+/* Record completed transmissions and update hwavail.
+ *
+ * nr_ntc is the oldest tx buffer not yet completed
+ * (same as nr_hwavail + nr_hwcur + 1),
+ * nr_hwcur is the first unsent buffer.
+ * When cleaning, we try to recover buffers between nr_ntc and nr_hwcur.
+ */
+static int
+generic_netmap_tx_clean(struct netmap_kring *kring)
+{
+ u_int num_slots = kring->nkr_num_slots;
+ u_int ntc = kring->nr_ntc;
+ u_int hwcur = kring->nr_hwcur;
+ u_int n = 0;
+ struct mbuf **tx_pool = kring->tx_pool;
+
+ while (ntc != hwcur) { /* buffers not completed */
+ struct mbuf *m = tx_pool[ntc];
+
+ if (unlikely(m == NULL)) {
+ /* try to replenish the entry */
+ tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (unlikely(m == NULL)) {
+ D("mbuf allocation failed, XXX error");
+ // XXX how do we proceed ? break ?
+ return -ENOMEM;
+ }
+ } else if (GET_MBUF_REFCNT(m) != 1) {
+ break; /* This mbuf is still busy: its refcnt is 2. */
+ }
+ if (unlikely(++ntc == num_slots)) {
+ ntc = 0;
+ }
+ n++;
+ }
+ kring->nr_ntc = ntc;
+ kring->nr_hwavail += n;
+ ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail);
+
+ return n;
+}
+
+
+/*
+ * We have pending packets in the driver between nr_ntc and j.
+ * Compute a position in the middle, to be used to generate
+ * a notification.
+ */
+static inline u_int
+generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
+{
+ u_int n = kring->nkr_num_slots;
+ u_int ntc = kring->nr_ntc;
+ u_int e;
+
+ if (hwcur >= ntc) {
+ e = (hwcur + ntc) / 2;
+ } else { /* wrap around */
+ e = (hwcur + n + ntc) / 2;
+ if (e >= n) {
+ e -= n;
+ }
+ }
+
+ if (unlikely(e >= n)) {
+ D("This cannot happen");
+ e = 0;
+ }
+
+ return e;
+}
+
+/*
+ * We have pending packets in the driver between nr_ntc and hwcur.
+ * Schedule a notification approximately in the middle of the two.
+ * There is a race but this is only called within txsync which does
+ * a double check.
+ */
+static void
+generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
+{
+ struct mbuf *m;
+ u_int e;
+
+ if (kring->nr_ntc == hwcur) {
+ return;
+ }
+ e = generic_tx_event_middle(kring, hwcur);
+
+ m = kring->tx_pool[e];
+ if (m == NULL) {
+ /* This can happen if there is already an event on the netmap
+ slot 'e': There is nothing to do. */
+ return;
+ }
+ ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
+ kring->tx_pool[e] = NULL;
+ SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
+
+ // XXX wmb() ?
+ /* Decrement the refcount an free it if we have the last one. */
+ m_freem(m);
+ smp_mb();
+}
+
+
+/*
+ * generic_netmap_txsync() transforms netmap buffers into mbufs
+ * and passes them to the standard device driver
+ * (ndo_start_xmit() or ifp->if_transmit() ).
+ * On linux this is not done directly, but using dev_queue_xmit(),
+ * since it implements the TX flow control (and takes some locks).
+ */
+static int
+generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct ifnet *ifp = na->ifp;
+ struct netmap_kring *kring = &na->tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int j, k, num_slots = kring->nkr_num_slots;
+ int new_slots, ntx;
+
+ IFRATE(rate_ctx.new.txsync++);
+
+ // TODO: handle the case of mbuf allocation failure
+ /* first, reclaim completed buffers */
+ generic_netmap_tx_clean(kring);
+
+ /* Take a copy of ring->cur now, and never read it again. */
+ k = ring->cur;
+ if (unlikely(k >= num_slots)) {
+ return netmap_ring_reinit(kring);
+ }
+
+ rmb();
+ j = kring->nr_hwcur;
+ /*
+ * 'new_slots' counts how many new slots have been added:
+ * everything from hwcur to cur, excluding reserved ones, if any.
+ * nr_hwreserved start from hwcur and counts how many slots were
+ * not sent to the NIC from the previous round.
+ */
+ new_slots = k - j - kring->nr_hwreserved;
+ if (new_slots < 0) {
+ new_slots += num_slots;
+ }
+ ntx = 0;
+ if (j != k) {
+ /* Process new packets to send:
+ * j is the current index in the netmap ring.
+ */
+ while (j != k) {
+ struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */
+ void *addr = NMB(slot);
+ u_int len = slot->len;
+ struct mbuf *m;
+ int tx_ret;
+
+ if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) {
+ return netmap_ring_reinit(kring);
+ }
+ /* Tale a mbuf from the tx pool and copy in the user packet. */
+ m = kring->tx_pool[j];
+ if (unlikely(!m)) {
+ RD(5, "This should never happen");
+ kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (unlikely(m == NULL)) {
+ D("mbuf allocation failed");
+ break;
+ }
+ }
+ /* XXX we should ask notifications when NS_REPORT is set,
+ * or roughly every half frame. We can optimize this
+ * by lazily requesting notifications only when a
+ * transmission fails. Probably the best way is to
+ * break on failures and set notifications when
+ * ring->avail == 0 || j != k
+ */
+ tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
+ if (unlikely(tx_ret)) {
+ RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]",
+ tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail);
+ /*
+ * No room for this mbuf in the device driver.
+ * Request a notification FOR A PREVIOUS MBUF,
+ * then call generic_netmap_tx_clean(kring) to do the
+ * double check and see if we can free more buffers.
+ * If there is space continue, else break;
+ * NOTE: the double check is necessary if the problem
+ * occurs in the txsync call after selrecord().
+ * Also, we need some way to tell the caller that not
+ * all buffers were queued onto the device (this was
+ * not a problem with native netmap driver where space
+ * is preallocated). The bridge has a similar problem
+ * and we solve it there by dropping the excess packets.
+ */
+ generic_set_tx_event(kring, j);
+ if (generic_netmap_tx_clean(kring)) { /* space now available */
+ continue;
+ } else {
+ break;
+ }
+ }
+ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
+ if (unlikely(++j == num_slots))
+ j = 0;
+ ntx++;
+ }
+
+ /* Update hwcur to the next slot to transmit. */
+ kring->nr_hwcur = j;
+
+ /*
+ * Report all new slots as unavailable, even those not sent.
+ * We account for them with with hwreserved, so that
+ * nr_hwreserved =:= cur - nr_hwcur
+ */
+ kring->nr_hwavail -= new_slots;
+ kring->nr_hwreserved = k - j;
+ if (kring->nr_hwreserved < 0) {
+ kring->nr_hwreserved += num_slots;
+ }
+
+ IFRATE(rate_ctx.new.txpkt += ntx);
+
+ if (!kring->nr_hwavail) {
+ /* No more available slots? Set a notification event
+ * on a netmap slot that will be cleaned in the future.
+ * No doublecheck is performed, since txsync() will be
+ * called twice by netmap_poll().
+ */
+ generic_set_tx_event(kring, j);
+ }
+ ND("tx #%d, hwavail = %d", n, kring->nr_hwavail);
+ }
+
+ /* Synchronize the user's view to the kernel view. */
+ ring->avail = kring->nr_hwavail;
+ ring->reserved = kring->nr_hwreserved;
+
+ return 0;
+}
+
+/*
+ * This handler is registered (through netmap_catch_rx())
+ * within the attached network interface
+ * in the RX subsystem, so that every mbuf passed up by
+ * the driver can be stolen to the network stack.
+ * Stolen packets are put in a queue where the
+ * generic_netmap_rxsync() callback can extract them.
+ */
+void generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
+{
+ struct netmap_adapter *na = NA(ifp);
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ u_int work_done;
+ u_int rr = 0; // receive ring number
+
+ ND("called");
+ /* limit the size of the queue */
+ if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
+ m_freem(m);
+ } else {
+ mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
+ }
+
+ if (netmap_generic_mit < 32768) {
+ /* no rx mitigation, pass notification up */
+ netmap_generic_irq(na->ifp, rr, &work_done);
+ IFRATE(rate_ctx.new.rxirq++);
+ } else {
+ /* same as send combining, filter notification if there is a
+ * pending timer, otherwise pass it up and start a timer.
+ */
+ if (likely(netmap_mitigation_active(gna))) {
+ /* Record that there is some pending work. */
+ gna->mit_pending = 1;
+ } else {
+ netmap_generic_irq(na->ifp, rr, &work_done);
+ IFRATE(rate_ctx.new.rxirq++);
+ netmap_mitigation_start(gna);
+ }
+ }
+}
+
+/*
+ * generic_netmap_rxsync() extracts mbufs from the queue filled by
+ * generic_netmap_rx_handler() and puts their content in the netmap
+ * receive ring.
+ * Access must be protected because the rx handler is asynchronous,
+ */
+static int
+generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int j, n, lim = kring->nkr_num_slots - 1;
+ int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
+ u_int k, resvd = ring->reserved;
+
+ if (ring->cur > lim)
+ return netmap_ring_reinit(kring);
+
+ /* Import newly received packets into the netmap ring. */
+ if (netmap_no_pendintr || force_update) {
+ uint16_t slot_flags = kring->nkr_slot_flags;
+ struct mbuf *m;
+
+ n = 0;
+ j = kring->nr_ntc; /* first empty slot in the receive ring */
+ /* extract buffers from the rx queue, stop at most one
+ * slot before nr_hwcur (index k)
+ */
+ k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim;
+ while (j != k) {
+ int len;
+ void *addr = NMB(&ring->slot[j]);
+
+ if (addr == netmap_buffer_base) { /* Bad buffer */
+ return netmap_ring_reinit(kring);
+ }
+ /*
+ * Call the locked version of the function.
+ * XXX Ideally we could grab a batch of mbufs at once,
+ * by changing rx_queue into a ring.
+ */
+ m = mbq_safe_dequeue(&kring->rx_queue);
+ if (!m)
+ break;
+ len = MBUF_LEN(m);
+ m_copydata(m, 0, len, addr);
+ ring->slot[j].len = len;
+ ring->slot[j].flags = slot_flags;
+ m_freem(m);
+ if (unlikely(j++ == lim))
+ j = 0;
+ n++;
+ }
+ if (n) {
+ kring->nr_ntc = j;
+ kring->nr_hwavail += n;
+ IFRATE(rate_ctx.new.rxpkt += n);
+ }
+ kring->nr_kflags &= ~NKR_PENDINTR;
+ }
+
+ // XXX should we invert the order ?
+ /* Skip past packets that userspace has released */
+ j = kring->nr_hwcur;
+ k = ring->cur;
+ if (resvd > 0) {
+ if (resvd + ring->avail >= lim + 1) {
+ D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
+ ring->reserved = resvd = 0; // XXX panic...
+ }
+ k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
+ }
+ if (j != k) {
+ /* Userspace has released some packets. */
+ for (n = 0; j != k; n++) {
+ struct netmap_slot *slot = &ring->slot[j];
+
+ slot->flags &= ~NS_BUF_CHANGED;
+ if (unlikely(j++ == lim))
+ j = 0;
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = k;
+ }
+ /* Tell userspace that there are new packets. */
+ ring->avail = kring->nr_hwavail - resvd;
+ IFRATE(rate_ctx.new.rxsync++);
+
+ return 0;
+}
+
+static void
+generic_netmap_dtor(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
+ struct netmap_adapter *prev_na = gna->prev;
+
+ if (prev_na != NULL) {
+ D("Released generic NA %p", gna);
+ if_rele(na->ifp);
+ netmap_adapter_put(prev_na);
+ }
+ if (ifp != NULL) {
+ WNA(ifp) = prev_na;
+ D("Restored native NA %p", prev_na);
+ na->ifp = NULL;
+ }
+}
+
+/*
+ * generic_netmap_attach() makes it possible to use netmap on
+ * a device without native netmap support.
+ * This is less performant than native support but potentially
+ * faster than raw sockets or similar schemes.
+ *
+ * In this "emulated" mode, netmap rings do not necessarily
+ * have the same size as those in the NIC. We use a default
+ * value and possibly override it if the OS has ways to fetch the
+ * actual configuration.
+ */
+int
+generic_netmap_attach(struct ifnet *ifp)
+{
+ struct netmap_adapter *na;
+ struct netmap_generic_adapter *gna;
+ int retval;
+ u_int num_tx_desc, num_rx_desc;
+
+ num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
+
+ generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
+ ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
+
+ gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (gna == NULL) {
+ D("no memory on attach, give up");
+ return ENOMEM;
+ }
+ na = (struct netmap_adapter *)gna;
+ na->ifp = ifp;
+ na->num_tx_desc = num_tx_desc;
+ na->num_rx_desc = num_rx_desc;
+ na->nm_register = &generic_netmap_register;
+ na->nm_txsync = &generic_netmap_txsync;
+ na->nm_rxsync = &generic_netmap_rxsync;
+ na->nm_dtor = &generic_netmap_dtor;
+ /* when using generic, IFCAP_NETMAP is set so we force
+ * NAF_SKIP_INTR to use the regular interrupt handler
+ */
+ na->na_flags = NAF_SKIP_INTR;
+
+ ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
+ ifp->num_tx_queues, ifp->real_num_tx_queues,
+ ifp->tx_queue_len);
+ ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
+ ifp->num_rx_queues, ifp->real_num_rx_queues);
+
+ generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
+
+ retval = netmap_attach_common(na);
+ if (retval) {
+ free(gna, M_DEVBUF);
+ }
+
+ return retval;
+}
diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
index 12bd882521b3..c009f5e62684 100644
--- a/sys/dev/netmap/netmap_kern.h
+++ b/sys/dev/netmap/netmap_kern.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -33,27 +34,61 @@
#ifndef _NET_NETMAP_KERN_H_
#define _NET_NETMAP_KERN_H_
+#define WITH_VALE // comment out to disable VALE support
+
#if defined(__FreeBSD__)
#define likely(x) __builtin_expect((long)!!(x), 1L)
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
#define NM_LOCK_T struct mtx
+#define NMG_LOCK_T struct mtx
+#define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, \
+ "netmap global lock", NULL, MTX_DEF)
+#define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock)
+#define NMG_LOCK() mtx_lock(&netmap_global_lock)
+#define NMG_UNLOCK() mtx_unlock(&netmap_global_lock)
+#define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED)
+
#define NM_SELINFO_T struct selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
+#define MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
-#define NM_ATOMIC_T volatile int
+#define NM_ATOMIC_T volatile int // XXX ?
+/* atomic operations */
+#include <machine/atomic.h>
+#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
+#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
+
+#define prefetch(x) __builtin_prefetch(x)
+
+MALLOC_DECLARE(M_NETMAP);
+
+// XXX linux struct, not used in FreeBSD
+struct net_device_ops {
+};
+struct hrtimer {
+};
#elif defined (linux)
#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
#define NM_SELINFO_T wait_queue_head_t
#define MBUF_LEN(m) ((m)->len)
+#define MBUF_IFP(m) ((m)->dev)
#define NM_SEND_UP(ifp, m) netif_rx(m)
#define NM_ATOMIC_T volatile long unsigned int
+// XXX a mtx would suffice here too 20130404 gl
+#define NMG_LOCK_T struct semaphore
+#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1)
+#define NMG_LOCK_DESTROY()
+#define NMG_LOCK() down(&netmap_global_lock)
+#define NMG_UNLOCK() up(&netmap_global_lock)
+#define NMG_LOCK_ASSERT() // XXX to be completed
+
#ifndef DEV_NETMAP
#define DEV_NETMAP
#endif /* DEV_NETMAP */
@@ -115,6 +150,10 @@ struct netmap_priv_d;
const char *nm_dump_buf(char *p, int len, int lim, char *dst);
+#include "netmap_mbq.h"
+
+extern NMG_LOCK_T netmap_global_lock;
+
/*
* private, kernel view of a ring. Keeps track of the status of
* a ring across system calls.
@@ -152,7 +191,7 @@ const char *nm_dump_buf(char *p, int len, int lim, char *dst);
* nkr_leases array of nkr_num_slots where writers can report
* completion of their block. NR_NOSLOT (~0) indicates
* that the writer has not finished yet
- * nkr_lease_idx index of next free slot in nr_leases, to be assigned
+ * nkr_lease_idx index of next free slot in nr_leases, to be assigned
*
* The kring is manipulated by txsync/rxsync and generic netmap function.
* q_lock is used to arbitrate access to the kring from within the netmap
@@ -166,6 +205,7 @@ struct netmap_kring {
uint32_t nr_hwcur;
uint32_t nr_hwavail;
uint32_t nr_kflags; /* private driver flags */
+ int32_t nr_hwreserved;
#define NKR_PENDINTR 0x1 // Pending interrupt.
uint32_t nkr_num_slots;
int32_t nkr_hwofs; /* offset between NIC and netmap ring */
@@ -183,6 +223,17 @@ struct netmap_kring {
NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
volatile int nkr_stopped;
+
+ /* support for adapters without native netmap support.
+ * On tx rings we preallocate an array of tx buffers
+ * (same size as the netmap ring), on rx rings we
+ * store incoming packets in a queue.
+ * XXX who writes to the rx queue ?
+ */
+ struct mbuf **tx_pool;
+ u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
+ struct mbq rx_queue; /* A queue for intercepted rx mbufs. */
+
} __attribute__((__aligned__(64)));
@@ -245,22 +296,26 @@ nm_next(uint32_t i, uint32_t lim)
+enum txrx { NR_RX = 0, NR_TX = 1 };
/*
- * This struct extends the 'struct adapter' (or
- * equivalent) device descriptor. It contains all fields needed to
- * support netmap operation.
+ * The "struct netmap_adapter" extends the "struct adapter"
+ * (or equivalent) device descriptor.
+ * It contains all base fields needed to support netmap operation.
+ * There are in fact different types of netmap adapters
+ * (native, generic, VALE switch...) so a netmap_adapter is
+ * just the first field in the derived type.
*/
struct netmap_adapter {
/*
* On linux we do not have a good way to tell if an interface
- * is netmap-capable. So we use the following trick:
+ * is netmap-capable. So we always use the following trick:
* NA(ifp) points here, and the first entry (which hopefully
* always exists and is at least 32 bits) contains a magic
* value which we can use to detect that the interface is good.
*/
uint32_t magic;
- uint32_t na_flags; /* future place for IFCAP_NETMAP */
+ uint32_t na_flags; /* enabled, and other flags */
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
*/
@@ -272,17 +327,16 @@ struct netmap_adapter {
#define NAF_MEM_OWNER 8 /* the adapter is responsible for the
* deallocation of the memory allocator
*/
- int refcount; /* number of user-space descriptors using this
+#define NAF_NATIVE_ON 16 /* the adapter is native and the attached
+ * interface is in netmap mode
+ */
+#define NAF_NETMAP_ON 32 /* netmap is active (either native or
+ * emulated. Where possible (e.g. FreeBSD)
+ * IFCAP_NETMAP also mirrors this flag.
+ */
+ int active_fds; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
- /*
- * The selwakeup in the interrupt thread can use per-ring
- * and/or global wait queues. We track how many clients
- * of each type we have so we can optimize the drivers,
- * and especially avoid huge contention on the locks.
- */
- int na_single; /* threads attached to a single hw queue */
- int na_multi; /* threads attached to multiple hw queues */
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
@@ -296,6 +350,9 @@ struct netmap_adapter {
*/
struct netmap_kring *tx_rings; /* array of TX rings. */
struct netmap_kring *rx_rings; /* array of RX rings. */
+ void *tailroom; /* space below the rings array */
+ /* (used for leases) */
+
NM_SELINFO_T tx_si, rx_si; /* global wait queues */
@@ -309,47 +366,157 @@ struct netmap_adapter {
*/
struct ifnet *ifp; /* adapter is ifp->if_softc */
- NM_LOCK_T core_lock; /* used if no device lock available */
+ /* private cleanup */
+ void (*nm_dtor)(struct netmap_adapter *);
- int (*nm_register)(struct ifnet *, int onoff);
+ int (*nm_register)(struct netmap_adapter *, int onoff);
- int (*nm_txsync)(struct ifnet *, u_int ring, int flags);
- int (*nm_rxsync)(struct ifnet *, u_int ring, int flags);
+ int (*nm_txsync)(struct netmap_adapter *, u_int ring, int flags);
+ int (*nm_rxsync)(struct netmap_adapter *, u_int ring, int flags);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
/* return configuration information */
- int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd,
- u_int *rxr, u_int *rxd);
+ int (*nm_config)(struct netmap_adapter *,
+ u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
+ int (*nm_krings_create)(struct netmap_adapter *);
+ void (*nm_krings_delete)(struct netmap_adapter *);
+ int (*nm_notify)(struct netmap_adapter *,
+ u_int ring, enum txrx, int flags);
+#define NAF_GLOBAL_NOTIFY 4
+#define NAF_DISABLE_NOTIFY 8
+
+ /* standard refcount to control the lifetime of the adapter
+ * (it should be equal to the lifetime of the corresponding ifp)
+ */
+ int na_refcount;
+
+ /* memory allocator (opaque)
+ * We also cache a pointer to the lut_entry for translating
+ * buffer addresses, and the total number of buffers.
+ */
+ struct netmap_mem_d *nm_mem;
+ struct lut_entry *na_lut;
+ uint32_t na_lut_objtotal; /* max buffer index */
+
+ /* used internally. If non-null, the interface cannot be bound
+ * from userspace
+ */
+ void *na_private;
+};
+
+/*
+ * If the NIC is owned by the kernel
+ * (i.e., bridge), neither another bridge nor user can use it;
+ * if the NIC is owned by a user, only users can share it.
+ * Evaluation must be done under NMG_LOCK().
+ */
+#define NETMAP_OWNED_BY_KERN(na) (na->na_private)
+#define NETMAP_OWNED_BY_ANY(na) \
+ (NETMAP_OWNED_BY_KERN(na) || (na->active_fds > 0))
+
+
+/*
+ * derived netmap adapters for various types of ports
+ */
+struct netmap_vp_adapter { /* VALE software port */
+ struct netmap_adapter up;
/*
* Bridge support:
*
* bdg_port is the port number used in the bridge;
- * na_bdg_refcount is a refcount used for bridge ports,
- * when it goes to 0 we can detach+free this port
- * (a bridge port is always attached if it exists;
- * it is not always registered)
* na_bdg points to the bridge this NA is attached to.
*/
int bdg_port;
- int na_bdg_refcount;
struct nm_bridge *na_bdg;
+ int retry;
+
+ u_int offset; /* Offset of ethernet header for each packet. */
+};
+
+struct netmap_hw_adapter { /* physical device */
+ struct netmap_adapter up;
+
+ struct net_device_ops nm_ndo; // XXX linux only
+};
+
+struct netmap_generic_adapter { /* non-native device */
+ struct netmap_hw_adapter up;
+
+ /* Pointer to a previously used netmap adapter. */
+ struct netmap_adapter *prev;
+
+ /* generic netmap adapters support:
+ * a net_device_ops struct overrides ndo_select_queue(),
+ * save_if_input saves the if_input hook (FreeBSD),
+ * mit_timer and mit_pending implement rx interrupt mitigation,
+ */
+ struct net_device_ops generic_ndo;
+ void (*save_if_input)(struct ifnet *, struct mbuf *);
+
+ struct hrtimer mit_timer;
+ int mit_pending;
+};
+
+#ifdef WITH_VALE
+
+/* bridge wrapper for non VALE ports. It is used to connect real devices to the bridge.
+ *
+ * The real device must already have its own netmap adapter (hwna). The
+ * bridge wrapper and the hwna adapter share the same set of netmap rings and
+ * buffers, but they have two separate sets of krings descriptors, with tx/rx
+ * meanings swapped:
+ *
+ * netmap
+ * bwrap krings rings krings hwna
+ * +------+ +------+ +-----+ +------+ +------+
+ * |tx_rings->| |\ /| |----| |<-tx_rings|
+ * | | +------+ \ / +-----+ +------+ | |
+ * | | X | |
+ * | | / \ | |
+ * | | +------+/ \+-----+ +------+ | |
+ * |rx_rings->| | | |----| |<-rx_rings|
+ * | | +------+ +-----+ +------+ | |
+ * +------+ +------+
+ *
+ * - packets coming from the bridge go to the brwap rx rings, which are also the
+ * hwna tx rings. The bwrap notify callback will then complete the hwna tx
+ * (see netmap_bwrap_notify).
+ * - packets coming from the outside go to the hwna rx rings, which are also the
+ * bwrap tx rings. The (overwritten) hwna notify method will then complete
+ * the bridge tx (see netmap_bwrap_intr_notify).
+ *
+ * The bridge wrapper may optionally connect the hwna 'host' rings to the
+ * bridge. This is done by using a second port in the bridge and connecting it
+ * to the 'host' netmap_vp_adapter contained in the netmap_bwrap_adapter.
+ * The brwap host adapter cross-links the hwna host rings in the same way as shown above.
+ *
+ * - packets coming from the bridge and directed to host stack are handled by the
+ * bwrap host notify callback (see netmap_bwrap_host_notify)
+ * - packets coming from the host stack are still handled by the overwritten
+ * hwna notify callback (netmap_bwrap_intr_notify), but are diverted to the
+ * host adapter depending on the ring number.
+ *
+ */
+struct netmap_bwrap_adapter {
+ struct netmap_vp_adapter up;
+ struct netmap_vp_adapter host; /* for host rings */
+ struct netmap_adapter *hwna; /* the underlying device */
+
+ /* backup of the hwna notify callback */
+ int (*save_notify)(struct netmap_adapter *,
+ u_int ring, enum txrx, int flags);
/* When we attach a physical interface to the bridge, we
* allow the controlling process to terminate, so we need
* a place to store the netmap_priv_d data structure.
* This is only done when physical interfaces are attached to a bridge.
*/
struct netmap_priv_d *na_kpriv;
-
- /* memory allocator */
- struct netmap_mem_d *nm_mem;
-#ifdef linux
- struct net_device_ops nm_ndo;
-#endif /* linux */
};
+
/*
- * Available space in the ring.
+ * Available space in the ring. Only used in VALE code
*/
static inline uint32_t
nm_kr_space(struct netmap_kring *k, int is_rx)
@@ -357,7 +524,7 @@ nm_kr_space(struct netmap_kring *k, int is_rx)
int space;
if (is_rx) {
- int busy = k->nkr_hwlease - k->nr_hwcur;
+ int busy = k->nkr_hwlease - k->nr_hwcur + k->nr_hwreserved;
if (busy < 0)
busy += k->nkr_num_slots;
space = k->nkr_num_slots - 1 - busy;
@@ -381,25 +548,6 @@ nm_kr_space(struct netmap_kring *k, int is_rx)
}
-/* return update position */
-static inline uint32_t
-nm_kr_rxpos(struct netmap_kring *k)
-{
- uint32_t pos = k->nr_hwcur + k->nr_hwavail;
- if (pos >= k->nkr_num_slots)
- pos -= k->nkr_num_slots;
-#if 0
- if (pos >= k->nkr_num_slots ||
- k->nkr_hwlease >= k->nkr_num_slots ||
- k->nr_hwcur >= k->nkr_num_slots ||
- k->nr_hwavail >= k->nkr_num_slots ||
- k->nkr_lease_idx >= k->nkr_num_slots) {
- D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
- k->nkr_lease_idx, k->nkr_num_slots);
- }
-#endif
- return pos;
-}
/* make a lease on the kring for N positions. return the
@@ -435,23 +583,61 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
return lease_idx;
}
+#endif /* WITH_VALE */
+
+/* return update position */
+static inline uint32_t
+nm_kr_rxpos(struct netmap_kring *k)
+{
+ uint32_t pos = k->nr_hwcur + k->nr_hwavail;
+ if (pos >= k->nkr_num_slots)
+ pos -= k->nkr_num_slots;
+#if 0
+ if (pos >= k->nkr_num_slots ||
+ k->nkr_hwlease >= k->nkr_num_slots ||
+ k->nr_hwcur >= k->nkr_num_slots ||
+ k->nr_hwavail >= k->nkr_num_slots ||
+ k->nkr_lease_idx >= k->nkr_num_slots) {
+ D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
+ k->nkr_lease_idx, k->nkr_num_slots);
+ }
+#endif
+ return pos;
+}
+
/*
- * XXX NETMAP_DELETING() is unused
- *
- * The combination of "enable" (ifp->if_capenable & IFCAP_NETMAP)
- * and refcount gives the status of the interface, namely:
- *
- * enable refcount Status
- *
- * FALSE 0 normal operation
- * FALSE != 0 -- (impossible)
- * TRUE 1 netmap mode
- * TRUE 0 being deleted.
+ * protect against multiple threads using the same ring.
+ * also check that the ring has not been stopped.
+ * We only care for 0 or !=0 as a return code.
*/
+#define NM_KR_BUSY 1
+#define NM_KR_STOPPED 2
-#define NETMAP_DELETING(_na) ( ((_na)->refcount == 0) && \
- ( (_na)->ifp->if_capenable & IFCAP_NETMAP) )
+static __inline void nm_kr_put(struct netmap_kring *kr)
+{
+ NM_ATOMIC_CLEAR(&kr->nr_busy);
+}
+
+static __inline int nm_kr_tryget(struct netmap_kring *kr)
+{
+ /* check a first time without taking the lock
+ * to avoid starvation for nm_kr_get()
+ */
+ if (unlikely(kr->nkr_stopped)) {
+ ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
+ return NM_KR_STOPPED;
+ }
+ if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
+ return NM_KR_BUSY;
+ /* check a second time with lock held */
+ if (unlikely(kr->nkr_stopped)) {
+ ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
+ nm_kr_put(kr);
+ return NM_KR_STOPPED;
+ }
+ return 0;
+}
/*
@@ -472,16 +658,116 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
* netmap_reset() is a helper routine to be called in the driver
* when reinitializing a ring.
*/
-int netmap_attach(struct netmap_adapter *, u_int);
+int netmap_attach(struct netmap_adapter *);
+int netmap_attach_common(struct netmap_adapter *);
+void netmap_detach_common(struct netmap_adapter *na);
void netmap_detach(struct ifnet *);
int netmap_transmit(struct ifnet *, struct mbuf *);
-enum txrx { NR_RX = 0, NR_TX = 1 };
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
enum txrx tx, u_int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
+/* set/clear native flags. XXX maybe also if_transmit ? */
+static inline void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
+#ifdef IFCAP_NETMAP /* or FreeBSD ? */
+ ifp->if_capenable |= IFCAP_NETMAP;
+#endif
+#ifdef __FreeBSD__
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_transmit;
+#else
+ na->if_transmit = (void *)ifp->netdev_ops;
+ ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
+#endif
+}
+
+static inline void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+#ifdef __FreeBSD__
+ ifp->if_transmit = na->if_transmit;
+#else
+ ifp->netdev_ops = (void *)na->if_transmit;
+#endif
+ na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
+#ifdef IFCAP_NETMAP /* or FreeBSD ? */
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+#endif
+}
+
+/*
+ * validates parameters in the ring/kring, returns a value for cur,
+ * and the 'new_slots' value in the argument.
+ * If any error, returns cur > lim to force a reinit.
+ */
+u_int nm_txsync_prologue(struct netmap_kring *, u_int *);
+
+/*
+ * validates parameters in the ring/kring, returns a value for cur,
+ * and the 'reserved' value in the argument.
+ * If any error, returns cur > lim to force a reinit.
+ */
+u_int nm_rxsync_prologue(struct netmap_kring *, u_int *);
+
+/*
+ * update kring and ring at the end of txsync
+ */
+static inline void
+nm_txsync_finalize(struct netmap_kring *kring, u_int cur)
+{
+ /* recompute hwreserved */
+ kring->nr_hwreserved = cur - kring->nr_hwcur;
+ if (kring->nr_hwreserved < 0)
+ kring->nr_hwreserved += kring->nkr_num_slots;
+
+ /* update avail and reserved to what the kernel knows */
+ kring->ring->avail = kring->nr_hwavail;
+ kring->ring->reserved = kring->nr_hwreserved;
+}
+
+/* check/fix address and len in tx rings */
+#if 1 /* debug version */
+#define NM_CHECK_ADDR_LEN(_a, _l) do { \
+ if (_a == netmap_buffer_base || _l > NETMAP_BUF_SIZE) { \
+ RD(5, "bad addr/len ring %d slot %d idx %d len %d", \
+ ring_nr, nm_i, slot->buf_idx, len); \
+ if (_l > NETMAP_BUF_SIZE) \
+ _l = NETMAP_BUF_SIZE; \
+ } } while (0)
+#else /* no debug version */
+#define NM_CHECK_ADDR_LEN(_a, _l) do { \
+ if (_l > NETMAP_BUF_SIZE) \
+ _l = NETMAP_BUF_SIZE; \
+ } while (0)
+#endif
+
+
+/*---------------------------------------------------------------*/
+/*
+ * Support routines to be used with the VALE switch
+ */
+int netmap_update_config(struct netmap_adapter *na);
+int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom);
+void netmap_krings_delete(struct netmap_adapter *na);
+
+struct netmap_if *
+netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
+ uint16_t ringid, int *err);
+
+
+
u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
+int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
+#ifdef WITH_VALE
/*
* The following bridge-related interfaces are used by other kernel modules
* In the version that only supports unicast or broadcast, the lookup
@@ -489,15 +775,76 @@ u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
*/
-typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, uint8_t *ring_nr,
- struct netmap_adapter *);
-int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
-u_int netmap_bdg_learning(char *, u_int, uint8_t *, struct netmap_adapter *);
-#define NM_NAME "vale" /* prefix for the bridge port name */
-#define NM_BDG_MAXPORTS 254 /* up to 32 for bitmap, 254 ok otherwise */
+typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len,
+ uint8_t *ring_nr, struct netmap_vp_adapter *);
+u_int netmap_bdg_learning(char *, u_int, uint8_t *,
+ struct netmap_vp_adapter *);
+
+#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
+#define NM_NAME "vale" /* prefix for bridge port name */
+
+
+/* these are redefined in case of no VALE support */
+int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
+void netmap_init_bridges(void);
+int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
+
+#else /* !WITH_VALE */
+#define netmap_get_bdg_na(_1, _2, _3) 0
+#define netmap_init_bridges(_1)
+#define netmap_bdg_ctl(_1, _2) EINVAL
+#endif /* !WITH_VALE */
+
+/* Various prototypes */
+int netmap_poll(struct cdev *dev, int events, struct thread *td);
+
+
+int netmap_init(void);
+void netmap_fini(void);
+int netmap_get_memory(struct netmap_priv_d* p);
+void netmap_dtor(void *data);
+int netmap_dtor_locked(struct netmap_priv_d *priv);
+
+int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
+
+/* netmap_adapter creation/destruction */
+#define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie")
+#define NM_DEBUG_PUTGET 1
+
+#ifdef NM_DEBUG_PUTGET
+
+#define NM_DBG(f) __##f
+
+void __netmap_adapter_get(struct netmap_adapter *na);
+
+#define netmap_adapter_get(na) \
+ do { \
+ struct netmap_adapter *__na = na; \
+ D("getting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
+ __netmap_adapter_get(__na); \
+ } while (0)
+
+int __netmap_adapter_put(struct netmap_adapter *na);
+
+#define netmap_adapter_put(na) \
+ do { \
+ struct netmap_adapter *__na = na; \
+ D("putting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
+ __netmap_adapter_put(__na); \
+ } while (0)
+
+#else /* !NM_DEBUG_PUTGET */
+
+#define NM_DBG(f) f
+void netmap_adapter_get(struct netmap_adapter *na);
+int netmap_adapter_put(struct netmap_adapter *na);
+
+#endif /* !NM_DEBUG_PUTGET */
+
+
extern u_int netmap_buf_size;
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
extern int netmap_mitigate;
@@ -516,18 +863,18 @@ enum { /* verbose flags */
NM_VERB_NIC_TXSYNC = 0x2000,
};
+extern int netmap_txsync_retry;
+extern int netmap_generic_mit;
+extern int netmap_generic_ringsize;
+
/*
* NA returns a pointer to the struct netmap adapter from the ifp,
* WNA is used to write it.
- * SWNA() is used for the "host stack" endpoint associated
- * to an interface. It is allocated together with the main NA(),
- * as an array of two objects.
*/
#ifndef WNA
#define WNA(_ifp) (_ifp)->if_pspare[0]
#endif
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
-#define SWNA(_ifp) (NA(_ifp) + 1)
/*
* Macros to determine if an interface is netmap capable or netmap enabled.
@@ -561,6 +908,7 @@ enum { /* verbose flags */
#endif /* linux */
#ifdef __FreeBSD__
+
/* Callback invoked by the dma machinery after a successfull dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,
__unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
@@ -588,6 +936,7 @@ netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
}
}
+
#else /* linux */
/*
@@ -695,16 +1044,97 @@ PNMB(struct netmap_slot *slot, uint64_t *pp)
return ret;
}
+/* Generic version of NMB, which uses device-specific memory. */
+static inline void *
+BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot)
+{
+ struct lut_entry *lut = na->na_lut;
+ uint32_t i = slot->buf_idx;
+ return (unlikely(i >= na->na_lut_objtotal)) ?
+ lut[0].vaddr : lut[i].vaddr;
+}
+
/* default functions to handle rx/tx interrupts */
int netmap_rx_irq(struct ifnet *, u_int, u_int *);
#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
-
-#ifdef __FreeBSD__
-MALLOC_DECLARE(M_NETMAP);
-#endif /* __FreeBSD__ */
+void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
+void netmap_txsync_to_host(struct netmap_adapter *na);
void netmap_disable_all_rings(struct ifnet *);
void netmap_enable_all_rings(struct ifnet *);
+void netmap_disable_ring(struct netmap_kring *kr);
+
+
+/* Structure associated to each thread which registered an interface.
+ *
+ * The first 4 fields of this structure are written by NIOCREGIF and
+ * read by poll() and NIOC?XSYNC.
+ * There is low contention among writers (actually, a correct user program
+ * should have no contention among writers) and among writers and readers,
+ * so we use a single global lock to protect the structure initialization.
+ * Since initialization involves the allocation of memory, we reuse the memory
+ * allocator lock.
+ * Read access to the structure is lock free. Readers must check that
+ * np_nifp is not NULL before using the other fields.
+ * If np_nifp is NULL initialization has not been performed, so they should
+ * return an error to userlevel.
+ *
+ * The ref_done field is used to regulate access to the refcount in the
+ * memory allocator. The refcount must be incremented at most once for
+ * each open("/dev/netmap"). The increment is performed by the first
+ * function that calls netmap_get_memory() (currently called by
+ * mmap(), NIOCGINFO and NIOCREGIF).
+ * If the refcount is incremented, it is then decremented when the
+ * private structure is destroyed.
+ */
+struct netmap_priv_d {
+ struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
+
+ struct netmap_adapter *np_na;
+ int np_ringid; /* from the ioctl */
+ u_int np_qfirst, np_qlast; /* range of rings to scan */
+ uint16_t np_txpoll;
+
+ struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
+ /* np_refcount is only used on FreeBSD */
+ int np_refcount; /* use with NMG_LOCK held */
+};
+
+
+/*
+ * generic netmap emulation for devices that do not have
+ * native netmap support.
+ * XXX generic_netmap_register() is only exported to implement
+ * nma_is_generic().
+ */
+int generic_netmap_register(struct netmap_adapter *na, int enable);
+int generic_netmap_attach(struct ifnet *ifp);
+
+int netmap_catch_rx(struct netmap_adapter *na, int intercept);
+void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
+void netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable);
+int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
+int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
+void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
+
+static __inline int
+nma_is_generic(struct netmap_adapter *na)
+{
+ return na->nm_register == generic_netmap_register;
+}
+
+/*
+ * netmap_mitigation API. This is used by the generic adapter
+ * to reduce the number of interrupt requests/selwakeup
+ * to clients on incoming packets.
+ */
+void netmap_mitigation_init(struct netmap_generic_adapter *na);
+void netmap_mitigation_start(struct netmap_generic_adapter *na);
+void netmap_mitigation_restart(struct netmap_generic_adapter *na);
+int netmap_mitigation_active(struct netmap_generic_adapter *na);
+void netmap_mitigation_cleanup(struct netmap_generic_adapter *na);
+
+// int generic_timer_handler(struct hrtimer *t);
#endif /* _NET_NETMAP_KERN_H_ */
diff --git a/sys/dev/netmap/netmap_mbq.c b/sys/dev/netmap/netmap_mbq.c
new file mode 100644
index 000000000000..c8e581b69fe5
--- /dev/null
+++ b/sys/dev/netmap/netmap_mbq.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+
+#ifdef linux
+#include "bsd_glue.h"
+#else /* __FreeBSD__ */
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#endif /* __FreeBSD__ */
+
+#include "netmap_mbq.h"
+
+
+static inline void __mbq_init(struct mbq *q)
+{
+ q->head = q->tail = NULL;
+ q->count = 0;
+}
+
+void mbq_safe_init(struct mbq *q)
+{
+ mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
+ __mbq_init(q);
+}
+
+void mbq_init(struct mbq *q)
+{
+ __mbq_init(q);
+}
+
+static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
+{
+ m->m_nextpkt = NULL;
+ if (q->tail) {
+ q->tail->m_nextpkt = m;
+ q->tail = m;
+ } else {
+ q->head = q->tail = m;
+ }
+ q->count++;
+}
+
+void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
+{
+ mtx_lock(&q->lock);
+ __mbq_enqueue(q, m);
+ mtx_unlock(&q->lock);
+}
+
+void mbq_enqueue(struct mbq *q, struct mbuf *m)
+{
+ __mbq_enqueue(q, m);
+}
+
+static inline struct mbuf *__mbq_dequeue(struct mbq *q)
+{
+ struct mbuf *ret = NULL;
+
+ if (q->head) {
+ ret = q->head;
+ q->head = ret->m_nextpkt;
+ if (q->head == NULL) {
+ q->tail = NULL;
+ }
+ q->count--;
+ ret->m_nextpkt = NULL;
+ }
+
+ return ret;
+}
+
+struct mbuf *mbq_safe_dequeue(struct mbq *q)
+{
+ struct mbuf *ret;
+
+ mtx_lock(&q->lock);
+ ret = __mbq_dequeue(q);
+ mtx_unlock(&q->lock);
+
+ return ret;
+}
+
+struct mbuf *mbq_dequeue(struct mbq *q)
+{
+ return __mbq_dequeue(q);
+}
+
+/* XXX seems pointless to have a generic purge */
+static void __mbq_purge(struct mbq *q, int safe)
+{
+ struct mbuf *m;
+
+ for (;;) {
+ m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q);
+ if (m) {
+ m_freem(m);
+ } else {
+ break;
+ }
+ }
+}
+
+void mbq_purge(struct mbq *q)
+{
+ __mbq_purge(q, 0);
+}
+
+void mbq_safe_purge(struct mbq *q)
+{
+ __mbq_purge(q, 1);
+}
+
+void mbq_safe_destroy(struct mbq *q)
+{
+ mtx_destroy(&q->lock);
+}
+
+
+void mbq_destroy(struct mbq *q)
+{
+}
+
diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
new file mode 100644
index 000000000000..ad023b617a5d
--- /dev/null
+++ b/sys/dev/netmap/netmap_mbq.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD$
+ */
+
+
+#ifndef __NETMAP_MBQ_H__
+#define __NETMAP_MBQ_H__
+
+/*
+ * These function implement an mbuf tailq with an optional lock.
+ * The base functions act ONLY ON THE QUEUE, whereas the "safe"
+ * variants (mbq_safe_*) also handle the lock.
+ */
+
+/* XXX probably rely on a previous definition of SPINLOCK_T */
+#ifdef linux
+#define SPINLOCK_T safe_spinlock_t
+#else
+#define SPINLOCK_T struct mtx
+#endif
+
+/* A FIFO queue of mbufs with an optional lock. */
+struct mbq {
+ struct mbuf *head;
+ struct mbuf *tail;
+ int count;
+ SPINLOCK_T lock;
+};
+
+/* XXX "destroy" does not match "init" as a name.
+ * We should also clarify whether init can be used while
+ * holding a lock, and whether mbq_safe_destroy() is a NOP.
+ */
+void mbq_init(struct mbq *q);
+void mbq_destroy(struct mbq *q);
+void mbq_enqueue(struct mbq *q, struct mbuf *m);
+struct mbuf *mbq_dequeue(struct mbq *q);
+void mbq_purge(struct mbq *q);
+
+/* XXX missing mbq_lock() and mbq_unlock */
+
+void mbq_safe_init(struct mbq *q);
+void mbq_safe_destroy(struct mbq *q);
+void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
+struct mbuf *mbq_safe_dequeue(struct mbq *q);
+void mbq_safe_purge(struct mbq *q);
+
+static inline unsigned int mbq_len(struct mbq *q)
+{
+ return q->count;
+}
+
+#endif /* __NETMAP_MBQ_H_ */
diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
index a78904216057..f28f2c04751a 100644
--- a/sys/dev/netmap/netmap_mem2.c
+++ b/sys/dev/netmap/netmap_mem2.c
@@ -8,7 +8,7 @@
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -167,12 +167,12 @@ const struct netmap_mem_d nm_blueprint = {
#define DECLARE_SYSCTLS(id, name) \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
- CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
- CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
- CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
+ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
+ CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
+ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
+ CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
+ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
+ CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
SYSCTL_DECL(_dev_netmap);
DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
@@ -310,7 +310,7 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
}
if (p->objfree == 0) {
- D("%s allocator: run out of memory", p->name);
+ D("no more %s objects", p->name);
return NULL;
}
if (start)
@@ -395,28 +395,22 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
/* Return nonzero on error */
static int
-netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_if *nifp,
- struct netmap_slot *slot, u_int n)
+netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
{
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
u_int i = 0; /* slot counter */
uint32_t pos = 0; /* slot in p->bitmap */
uint32_t index = 0; /* buffer index */
- (void)nifp; /* UNUSED */
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
if (vaddr == NULL) {
- D("unable to locate empty packet buffer");
+ D("no more buffers after %d of %d", i, n);
goto cleanup;
}
slot[i].buf_idx = index;
slot[i].len = p->_objsize;
- /* XXX setting flags=NS_BUF_CHANGED forces a pointer reload
- * in the NIC ring. This is a hack that hides missing
- * initializations in the drivers, and should go away.
- */
- // slot[i].flags = NS_BUF_CHANGED;
+ slot[i].flags = 0;
}
ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
@@ -433,11 +427,10 @@ cleanup:
static void
-netmap_free_buf(struct netmap_mem_d *nmd, struct netmap_if *nifp, uint32_t i)
+netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
{
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
- (void)nifp;
if (i < 2 || i >= p->objtotal) {
D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
@@ -760,7 +753,8 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd)
}
-static void netmap_mem_private_deref(struct netmap_mem_d *nmd)
+static void
+netmap_mem_private_deref(struct netmap_mem_d *nmd)
{
NMA_LOCK(nmd);
if (--nmd->refcount <= 0)
@@ -845,7 +839,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
netmap_reset_obj_allocator(&nmd->pools[i]);
}
nmd->flags &= ~NETMAP_MEM_FINALIZED;
- }
+ }
for (i = 0; i < NETMAP_POOLS_NR; i++) {
nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
@@ -938,176 +932,156 @@ netmap_free_rings(struct netmap_adapter *na)
na->rx_rings[i].ring = NULL;
}
}
- free(na->tx_rings, M_DEVBUF);
- na->tx_rings = na->rx_rings = NULL;
}
-
-
-/* call with NMA_LOCK held */
-/*
- * Allocate the per-fd structure netmap_if.
- * If this is the first instance, also allocate the krings, rings etc.
+/* call with NMA_LOCK held *
*
- * We assume that the configuration stored in na
- * (number of tx/rx rings and descs) does not change while
- * the interface is in netmap mode.
+ * Allocate netmap rings and buffers for this card
+ * The rings are contiguous, but have variable size.
*/
-extern int nma_is_vp(struct netmap_adapter *na);
-struct netmap_if *
-netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
+int
+netmap_mem_rings_create(struct netmap_adapter *na)
{
- struct netmap_if *nifp;
struct netmap_ring *ring;
- ssize_t base; /* handy for relative offsets between rings and nifp */
- u_int i, len, ndesc, ntx, nrx;
+ u_int len, ndesc;
struct netmap_kring *kring;
- uint32_t *tx_leases = NULL, *rx_leases = NULL;
-
- /*
- * verify whether virtual port need the stack ring
- */
- ntx = na->num_tx_rings + 1; /* shorthand, include stack ring */
- nrx = na->num_rx_rings + 1; /* shorthand, include stack ring */
- /*
- * the descriptor is followed inline by an array of offsets
- * to the tx and rx rings in the shared memory region.
- * For virtual rx rings we also allocate an array of
- * pointers to assign to nkr_leases.
- */
NMA_LOCK(na->nm_mem);
- len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
- nifp = netmap_if_malloc(na->nm_mem, len);
- if (nifp == NULL) {
- NMA_UNLOCK(na->nm_mem);
- return NULL;
- }
-
- /* initialize base fields -- override const */
- *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
- *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
- strncpy(nifp->ni_name, ifname, (size_t)IFNAMSIZ);
-
- if (na->refcount) { /* already setup, we are done */
- goto final;
- }
-
- len = (ntx + nrx) * sizeof(struct netmap_kring);
- /*
- * Leases are attached to TX rings on NIC/host ports,
- * and to RX rings on VALE ports.
- */
- if (nma_is_vp(na)) {
- len += sizeof(uint32_t) * na->num_rx_desc * na->num_rx_rings;
- } else {
- len += sizeof(uint32_t) * na->num_tx_desc * ntx;
- }
-
- na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (na->tx_rings == NULL) {
- D("Cannot allocate krings for %s", ifname);
- goto cleanup;
- }
- na->rx_rings = na->tx_rings + ntx;
-
- if (nma_is_vp(na)) {
- rx_leases = (uint32_t *)(na->rx_rings + nrx);
- } else {
- tx_leases = (uint32_t *)(na->rx_rings + nrx);
- }
-
- /*
- * First instance, allocate netmap rings and buffers for this card
- * The rings are contiguous, but have variable size.
- */
- for (i = 0; i < ntx; i++) { /* Transmit rings */
- kring = &na->tx_rings[i];
- ndesc = na->num_tx_desc;
- bzero(kring, sizeof(*kring));
+ for (kring = na->tx_rings; kring != na->rx_rings; kring++) { /* Transmit rings */
+ ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
- D("Cannot allocate tx_ring[%d] for %s", i, ifname);
+ D("Cannot allocate tx_ring");
goto cleanup;
}
ND("txring[%d] at %p ofs %d", i, ring);
- kring->na = na;
kring->ring = ring;
- if (tx_leases) {
- kring->nkr_leases = tx_leases;
- tx_leases += ndesc;
- }
- *(uint32_t *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
+ *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
- /*
- * IMPORTANT:
- * Always keep one slot empty, so we can detect new
- * transmissions comparing cur and nr_hwcur (they are
- * the same only if there are no new transmissions).
- */
- ring->avail = kring->nr_hwavail = ndesc - 1;
- ring->cur = kring->nr_hwcur = 0;
+ ring->avail = kring->nr_hwavail;
+ ring->cur = kring->nr_hwcur;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
- ND("initializing slots for txring[%d]", i);
- if (netmap_new_bufs(na->nm_mem, nifp, ring->slot, ndesc)) {
- D("Cannot allocate buffers for tx_ring[%d] for %s", i, ifname);
+ ND("initializing slots for txring");
+ if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+ D("Cannot allocate buffers for tx_ring");
goto cleanup;
}
}
- for (i = 0; i < nrx; i++) { /* Receive rings */
- kring = &na->rx_rings[i];
- ndesc = na->num_rx_desc;
- bzero(kring, sizeof(*kring));
+ for ( ; kring != na->tailroom; kring++) { /* Receive rings */
+ ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
- D("Cannot allocate rx_ring[%d] for %s", i, ifname);
+ D("Cannot allocate rx_ring");
goto cleanup;
}
- ND("rxring[%d] at %p ofs %d", i, ring);
+ ND("rxring at %p ofs %d", ring);
- kring->na = na;
kring->ring = ring;
- if (rx_leases && i < na->num_rx_rings) {
- kring->nkr_leases = rx_leases;
- rx_leases += ndesc;
- }
- *(uint32_t *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
+ *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
- ring->cur = kring->nr_hwcur = 0;
- ring->avail = kring->nr_hwavail = 0; /* empty */
+ ring->cur = kring->nr_hwcur;
+ ring->avail = kring->nr_hwavail;
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for rxring[%d]", i);
- if (netmap_new_bufs(na->nm_mem, nifp, ring->slot, ndesc)) {
- D("Cannot allocate buffers for rx_ring[%d] for %s", i, ifname);
+ if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
+ D("Cannot allocate buffers for rx_ring");
goto cleanup;
}
}
-#ifdef linux
- // XXX initialize the selrecord structs.
- for (i = 0; i < ntx; i++)
- init_waitqueue_head(&na->tx_rings[i].si);
- for (i = 0; i < nrx; i++)
- init_waitqueue_head(&na->rx_rings[i].si);
- init_waitqueue_head(&na->tx_si);
- init_waitqueue_head(&na->rx_si);
-#endif
-final:
+
+ NMA_UNLOCK(na->nm_mem);
+
+ return 0;
+
+cleanup:
+ netmap_free_rings(na);
+
+ NMA_UNLOCK(na->nm_mem);
+
+ return ENOMEM;
+}
+
+void
+netmap_mem_rings_delete(struct netmap_adapter *na)
+{
+ /* last instance, release bufs and rings */
+ u_int i, lim;
+ struct netmap_kring *kring;
+ struct netmap_ring *ring;
+
+ NMA_LOCK(na->nm_mem);
+
+ for (kring = na->tx_rings; kring != na->tailroom; kring++) {
+ ring = kring->ring;
+ if (ring == NULL)
+ continue;
+ lim = kring->nkr_num_slots;
+ for (i = 0; i < lim; i++)
+ netmap_free_buf(na->nm_mem, ring->slot[i].buf_idx);
+ }
+ netmap_free_rings(na);
+
+ NMA_UNLOCK(na->nm_mem);
+}
+
+
+/* call with NMA_LOCK held */
+/*
+ * Allocate the per-fd structure netmap_if.
+ *
+ * We assume that the configuration stored in na
+ * (number of tx/rx rings and descs) does not change while
+ * the interface is in netmap mode.
+ */
+struct netmap_if *
+netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
+{
+ struct netmap_if *nifp;
+ ssize_t base; /* handy for relative offsets between rings and nifp */
+ u_int i, len, ntx, nrx;
+
+ /*
+ * verify whether virtual port need the stack ring
+ */
+ ntx = na->num_tx_rings + 1; /* shorthand, include stack ring */
+ nrx = na->num_rx_rings + 1; /* shorthand, include stack ring */
+ /*
+ * the descriptor is followed inline by an array of offsets
+ * to the tx and rx rings in the shared memory region.
+ * For virtual rx rings we also allocate an array of
+ * pointers to assign to nkr_leases.
+ */
+
+ NMA_LOCK(na->nm_mem);
+
+ len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
+ nifp = netmap_if_malloc(na->nm_mem, len);
+ if (nifp == NULL) {
+ NMA_UNLOCK(na->nm_mem);
+ return NULL;
+ }
+
+ /* initialize base fields -- override const */
+ *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
+ *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
+ strncpy(nifp->ni_name, ifname, (size_t)IFNAMSIZ);
+
/*
* fill the slots for the rx and tx rings. They contain the offset
* between the ring and nifp, so the information is usable in
@@ -1126,13 +1100,6 @@ final:
NMA_UNLOCK(na->nm_mem);
return (nifp);
-cleanup:
- netmap_free_rings(na);
- netmap_if_free(na->nm_mem, nifp);
-
- NMA_UNLOCK(na->nm_mem);
-
- return NULL;
}
void
@@ -1143,25 +1110,6 @@ netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
return;
NMA_LOCK(na->nm_mem);
- if (na->refcount <= 0) {
- /* last instance, release bufs and rings */
- u_int i, j, lim;
- struct netmap_ring *ring;
-
- for (i = 0; i < na->num_tx_rings + 1; i++) {
- ring = na->tx_rings[i].ring;
- lim = na->tx_rings[i].nkr_num_slots;
- for (j = 0; j < lim; j++)
- netmap_free_buf(na->nm_mem, nifp, ring->slot[j].buf_idx);
- }
- for (i = 0; i < na->num_rx_rings + 1; i++) {
- ring = na->rx_rings[i].ring;
- lim = na->rx_rings[i].nkr_num_slots;
- for (j = 0; j < lim; j++)
- netmap_free_buf(na->nm_mem, nifp, ring->slot[j].buf_idx);
- }
- netmap_free_rings(na);
- }
netmap_if_free(na->nm_mem, nifp);
NMA_UNLOCK(na->nm_mem);
@@ -1179,12 +1127,14 @@ netmap_mem_global_deref(struct netmap_mem_d *nmd)
NMA_UNLOCK(nmd);
}
-int netmap_mem_finalize(struct netmap_mem_d *nmd)
+int
+netmap_mem_finalize(struct netmap_mem_d *nmd)
{
return nmd->finalize(nmd);
}
-void netmap_mem_deref(struct netmap_mem_d *nmd)
+void
+netmap_mem_deref(struct netmap_mem_d *nmd)
{
return nmd->deref(nmd);
}
diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
index 83f31d011c45..f492f9814b79 100644
--- a/sys/dev/netmap/netmap_mem2.h
+++ b/sys/dev/netmap/netmap_mem2.h
@@ -189,7 +189,7 @@ struct netmap_mem_d {
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
- netmap_mem_config_t config;
+ netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
};
@@ -200,14 +200,17 @@ vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *);
int netmap_mem_init(void);
void netmap_mem_fini(void);
-struct netmap_if * netmap_mem_if_new(const char *, struct netmap_adapter *);
-void netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp);
+struct netmap_if *
+ netmap_mem_if_new(const char *, struct netmap_adapter *);
+void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
+int netmap_mem_rings_create(struct netmap_adapter *);
+void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *);
-int netmap_mem_get_info(struct netmap_mem_d *nm_mem, u_int *size, u_int *memflags);
-ssize_t netmap_mem_if_offset(struct netmap_mem_d *nm_mem, const void *vaddr);
+int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags);
+ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d*
netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int rxd);
-void netmap_mem_private_delete(struct netmap_mem_d *nm_mem);
+void netmap_mem_private_delete(struct netmap_mem_d *);
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)
diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
new file mode 100644
index 000000000000..e0ce94cccb7d
--- /dev/null
+++ b/sys/dev/netmap/netmap_vale.c
@@ -0,0 +1,1983 @@
+/*
+ * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+
+// #define prefetch(x) __builtin_prefetch(x)
+
+
+#define BDG_RWLOCK_T struct rwlock // struct rwlock
+
+#define BDG_RWINIT(b) \
+ rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
+#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
+#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
+#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
+#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
+#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
+#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_VALE
+
+/*
+ * system parameters (most of them in netmap_kern.h)
+ * NM_NAME prefix for switch port names, default "vale"
+ * NM_BDG_MAXPORTS number of ports
+ * NM_BRIDGES max number of switches in the system.
+ * XXX should become a sysctl or tunable
+ *
+ * Switch ports are named valeX:Y where X is the switch name and Y
+ * is the port. If Y matches a physical interface name, the port is
+ * connected to a physical device.
+ *
+ * Unlike physical interfaces, switch ports use their own memory region
+ * for rings and buffers.
+ * The virtual interfaces use per-queue lock instead of core lock.
+ * In the tx loop, we aggregate traffic in batches to make all operations
+ * faster. The batch size is bridge_batch.
+ */
+#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
+#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
+#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
+#define NM_BDG_HASH 1024 /* forwarding table entries */
+#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
+#define NM_MULTISEG 64 /* max size of a chain of bufs */
+/* actual size of the tables */
+#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
+/* NM_FT_NULL terminates a list of slots in the ft */
+#define NM_FT_NULL NM_BDG_BATCH_MAX
+#define NM_BRIDGES 8 /* number of bridges */
+
+
+/*
+ * bridge_batch is set via sysctl to the max batch size to be
+ * used in the bridge. The actual value may be larger as the
+ * last packet in the block may overflow the size.
+ */
+int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
+SYSCTL_DECL(_dev_netmap);
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
+
+
+static int bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp);
+static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
+static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
+static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
+int kern_netmap_regif(struct nmreq *nmr);
+
+/*
+ * Each transmit queue accumulates a batch of packets into
+ * a structure before forwarding. Packets to the same
+ * destination are put in a list using ft_next as a link field.
+ * ft_frags and ft_next are valid only on the first fragment.
+ */
+struct nm_bdg_fwd { /* forwarding entry for a bridge */
+ void *ft_buf; /* netmap or indirect buffer */
+ uint8_t ft_frags; /* how many fragments (only on 1st frag) */
+ uint8_t _ft_port; /* dst port (unused) */
+ uint16_t ft_flags; /* flags, e.g. indirect */
+ uint16_t ft_len; /* src fragment len */
+ uint16_t ft_next; /* next packet to same destination */
+};
+
+/*
+ * For each output interface, nm_bdg_q is used to construct a list.
+ * bq_len is the number of output buffers (we can have coalescing
+ * during the copy).
+ */
+struct nm_bdg_q {
+ uint16_t bq_head;
+ uint16_t bq_tail;
+ uint32_t bq_len; /* number of buffers */
+};
+
+/* XXX revise this */
+struct nm_hash_ent {
+ uint64_t mac; /* the top 2 bytes are the epoch */
+ uint64_t ports;
+};
+
+/*
+ * nm_bridge is a descriptor for a VALE switch.
+ * Interfaces for a bridge are all in bdg_ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search, but lookups only occur on attach/detach so we
+ * don't mind if they are slow.
+ *
+ * The bridge is non blocking on the transmit ports: excess
+ * packets are dropped if there is no room on the output port.
+ *
+ * bdg_lock protects accesses to the bdg_ports array.
+ * This is a rw lock (or equivalent).
+ */
+struct nm_bridge {
+ /* XXX what is the proper alignment/layout ? */
+ BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
+ int bdg_namelen;
+ uint32_t bdg_active_ports; /* 0 means free */
+ char bdg_basename[IFNAMSIZ];
+
+ /* Indexes of active ports (up to active_ports)
+ * and all other remaining ports.
+ */
+ uint8_t bdg_port_index[NM_BDG_MAXPORTS];
+
+ struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
+
+
+ /*
+ * The function to decide the destination port.
+ * It returns either of an index of the destination port,
+ * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
+ * forward this packet. ring_nr is the source ring index, and the
+ * function may overwrite this value to forward this packet to a
+ * different ring index.
+ * This function must be set by netmap_bdgctl().
+ */
+ bdg_lookup_fn_t nm_bdg_lookup;
+
+ /* the forwarding table, MAC+ports.
+ * XXX should be changed to an argument to be passed to
+ * the lookup function, and allocated on attach
+ */
+ struct nm_hash_ent ht[NM_BDG_HASH];
+};
+
+
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+struct nm_bridge nm_bridges[NM_BRIDGES];
+
+
+/*
+ * A few function to tell which kind of port are we using.
+ * XXX should we hold a lock ?
+ *
+ * nma_is_vp() virtual port
+ * nma_is_host() port connected to the host stack
+ * nma_is_hw() port connected to a NIC
+ * nma_is_generic() generic netmap adapter XXX stop this madness
+ */
+static __inline int
+nma_is_vp(struct netmap_adapter *na)
+{
+ return na->nm_register == bdg_netmap_reg;
+}
+
+
+static __inline int
+nma_is_host(struct netmap_adapter *na)
+{
+ return na->nm_register == NULL;
+}
+
+
+static __inline int
+nma_is_hw(struct netmap_adapter *na)
+{
+ /* In case of sw adapter, nm_register is NULL */
+ return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
+}
+
+static __inline int
+nma_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_register;
+}
+
+
+
+/*
+ * this is a slightly optimized copy routine which rounds
+ * to multiple of 64 bytes and is often faster than dealing
+ * with other odd sizes. We assume there is enough room
+ * in the source and destination buffers.
+ *
+ * XXX only for multiples of 64 bytes, non overlapped.
+ */
+static inline void
+pkt_copy(void *_src, void *_dst, int l)
+{
+ uint64_t *src = _src;
+ uint64_t *dst = _dst;
+ if (unlikely(l >= 1024)) {
+ memcpy(dst, src, l);
+ return;
+ }
+ for (; likely(l > 0); l-=64) {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
+}
+
+
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+static struct nm_bridge *
+nm_find_bridge(const char *name, int create)
+{
+ int i, l, namelen;
+ struct nm_bridge *b = NULL;
+
+ NMG_LOCK_ASSERT();
+
+ namelen = strlen(NM_NAME); /* base length */
+ l = name ? strlen(name) : 0; /* actual length */
+ if (l < namelen) {
+ D("invalid bridge name %s", name ? name : NULL);
+ return NULL;
+ }
+ for (i = namelen + 1; i < l; i++) {
+ if (name[i] == ':') {
+ namelen = i;
+ break;
+ }
+ }
+ if (namelen >= IFNAMSIZ)
+ namelen = IFNAMSIZ;
+ ND("--- prefix is '%.*s' ---", namelen, name);
+
+ /* lookup the name, remember empty slot if there is one */
+ for (i = 0; i < NM_BRIDGES; i++) {
+ struct nm_bridge *x = nm_bridges + i;
+
+ if (x->bdg_active_ports == 0) {
+ if (create && b == NULL)
+ b = x; /* record empty slot */
+ } else if (x->bdg_namelen != namelen) {
+ continue;
+ } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ b = x;
+ break;
+ }
+ }
+ if (i == NM_BRIDGES && b) { /* name not found, can create entry */
+ /* initialize the bridge */
+ strncpy(b->bdg_basename, name, namelen);
+ ND("create new bridge %s with ports %d", b->bdg_basename,
+ b->bdg_active_ports);
+ b->bdg_namelen = namelen;
+ b->bdg_active_ports = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++)
+ b->bdg_port_index[i] = i;
+ /* set the default function */
+ b->nm_bdg_lookup = netmap_bdg_learning;
+ /* reset the MAC address table */
+ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ }
+ return b;
+}
+
+
+/*
+ * Free the forwarding tables for rings attached to switch ports.
+ */
+static void
+nm_free_bdgfwd(struct netmap_adapter *na)
+{
+ int nrings, i;
+ struct netmap_kring *kring;
+
+ NMG_LOCK_ASSERT();
+ nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
+ kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
+ for (i = 0; i < nrings; i++) {
+ if (kring[i].nkr_ft) {
+ free(kring[i].nkr_ft, M_DEVBUF);
+ kring[i].nkr_ft = NULL; /* protect from freeing twice */
+ }
+ }
+}
+
+
+/*
+ * Allocate the forwarding tables for the rings attached to the bridge ports.
+ */
+static int
+nm_alloc_bdgfwd(struct netmap_adapter *na)
+{
+ int nrings, l, i, num_dstq;
+ struct netmap_kring *kring;
+
+ NMG_LOCK_ASSERT();
+ /* all port:rings + broadcast */
+ num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
+ l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
+ l += sizeof(struct nm_bdg_q) * num_dstq;
+ l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
+
+ nrings = na->num_tx_rings + 1;
+ kring = na->tx_rings;
+ for (i = 0; i < nrings; i++) {
+ struct nm_bdg_fwd *ft;
+ struct nm_bdg_q *dstq;
+ int j;
+
+ ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!ft) {
+ nm_free_bdgfwd(na);
+ return ENOMEM;
+ }
+ dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ for (j = 0; j < num_dstq; j++) {
+ dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
+ dstq[j].bq_len = 0;
+ }
+ kring[i].nkr_ft = ft;
+ }
+ return 0;
+}
+
+
+static void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+ int s_hw = hw, s_sw = sw;
+ int i, lim =b->bdg_active_ports;
+ uint8_t tmp[NM_BDG_MAXPORTS];
+
+ /*
+ New algorithm:
+ make a copy of bdg_port_index;
+ lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+ in the array of bdg_port_index, replacing them with
+ entries from the bottom of the array;
+ decrement bdg_active_ports;
+ acquire BDG_WLOCK() and copy back the array.
+ */
+
+ D("detach %d and %d (lim %d)", hw, sw, lim);
+ /* make a copy of the list of active ports, update it,
+ * and then copy back within BDG_WLOCK().
+ */
+ memcpy(tmp, b->bdg_port_index, sizeof(tmp));
+ for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+ if (hw >= 0 && tmp[i] == hw) {
+ ND("detach hw %d at %d", hw, i);
+ lim--; /* point to last active port */
+ tmp[i] = tmp[lim]; /* swap with i */
+ tmp[lim] = hw; /* now this is inactive */
+ hw = -1;
+ } else if (sw >= 0 && tmp[i] == sw) {
+ ND("detach sw %d at %d", sw, i);
+ lim--;
+ tmp[i] = tmp[lim];
+ tmp[lim] = sw;
+ sw = -1;
+ } else {
+ i++;
+ }
+ }
+ if (hw >= 0 || sw >= 0) {
+ D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ }
+
+ BDG_WLOCK(b);
+ b->bdg_ports[s_hw] = NULL;
+ if (s_sw >= 0) {
+ b->bdg_ports[s_sw] = NULL;
+ }
+ memcpy(b->bdg_port_index, tmp, sizeof(tmp));
+ b->bdg_active_ports = lim;
+ BDG_WUNLOCK(b);
+
+ ND("now %d active ports", lim);
+ if (lim == 0) {
+ ND("marking bridge %s as free", b->bdg_basename);
+ b->nm_bdg_lookup = NULL;
+ }
+}
+
+static void
+netmap_adapter_vp_dtor(struct netmap_adapter *na)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
+ struct nm_bridge *b = vpna->na_bdg;
+ struct ifnet *ifp = na->ifp;
+
+ ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
+
+ if (b) {
+ netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+ }
+
+ bzero(ifp, sizeof(*ifp));
+ free(ifp, M_DEVBUF);
+ na->ifp = NULL;
+}
+
+int
+netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
+{
+ const char *name = nmr->nr_name;
+ struct ifnet *ifp;
+ int error = 0;
+ struct netmap_adapter *ret;
+ struct netmap_vp_adapter *vpna;
+ struct nm_bridge *b;
+ int i, j, cand = -1, cand2 = -1;
+ int needed;
+
+ *na = NULL; /* default return value */
+
+ /* first try to see if this is a bridge port. */
+ NMG_LOCK_ASSERT();
+ if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
+ return 0; /* no error, but no VALE prefix */
+ }
+
+ b = nm_find_bridge(name, create);
+ if (b == NULL) {
+ D("no bridges available for '%s'", name);
+ return (ENXIO);
+ }
+
+ /* Now we are sure that name starts with the bridge's name,
+ * lookup the port in the bridge. We need to scan the entire
+ * list. It is not important to hold a WLOCK on the bridge
+ * during the search because NMG_LOCK already guarantees
+ * that there are no other possible writers.
+ */
+
+ /* lookup in the local list of ports */
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ // KASSERT(na != NULL);
+ ifp = vpna->up.ifp;
+ /* XXX make sure the name only contains one : */
+ if (!strcmp(NM_IFPNAME(ifp), name)) {
+ netmap_adapter_get(&vpna->up);
+ ND("found existing if %s refs %d", name,
+ vpna->na_bdg_refcount);
+ *na = (struct netmap_adapter *)vpna;
+ return 0;
+ }
+ }
+ /* not found, should we create it? */
+ if (!create)
+ return ENXIO;
+ /* yes we should, see if we have space to attach entries */
+ needed = 2; /* in some cases we only need 1 */
+ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+ D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ return EINVAL;
+ }
+ /* record the next two ports available, but do not allocate yet */
+ cand = b->bdg_port_index[b->bdg_active_ports];
+ cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+ ND("+++ bridge %s port %s used %d avail %d %d",
+ b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
+
+ /*
+ * try see if there is a matching NIC with this name
+ * (after the bridge's name)
+ */
+ ifp = ifunit_ref(name + b->bdg_namelen + 1);
+ if (!ifp) { /* this is a virtual port */
+ if (nmr->nr_cmd) {
+ /* nr_cmd must be 0 for a virtual port */
+ return EINVAL;
+ }
+
+ /* create a struct ifnet for the new port.
+ * need M_NOWAIT as we are under nma_lock
+ */
+ ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!ifp)
+ return ENOMEM;
+
+ strcpy(ifp->if_xname, name);
+ /* bdg_netmap_attach creates a struct netmap_adapter */
+ error = bdg_netmap_attach(nmr, ifp);
+ if (error) {
+ D("error %d", error);
+ free(ifp, M_DEVBUF);
+ return error;
+ }
+ ret = NA(ifp);
+ cand2 = -1; /* only need one port */
+ } else { /* this is a NIC */
+ struct ifnet *fake_ifp;
+
+ error = netmap_get_hw_na(ifp, &ret);
+ if (error || ret == NULL)
+ goto out;
+
+ /* make sure the NIC is not already in use */
+ if (NETMAP_OWNED_BY_ANY(ret)) {
+ D("NIC %s busy, cannot attach to bridge",
+ NM_IFPNAME(ifp));
+ error = EINVAL;
+ goto out;
+ }
+ /* create a fake interface */
+ fake_ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!fake_ifp) {
+ error = ENOMEM;
+ goto out;
+ }
+ strcpy(fake_ifp->if_xname, name);
+ error = netmap_bwrap_attach(fake_ifp, ifp);
+ if (error) {
+ free(fake_ifp, M_DEVBUF);
+ goto out;
+ }
+ ret = NA(fake_ifp);
+ if (nmr->nr_arg1 != NETMAP_BDG_HOST)
+ cand2 = -1; /* only need one port */
+ if_rele(ifp);
+ }
+ vpna = (struct netmap_vp_adapter *)ret;
+
+ BDG_WLOCK(b);
+ vpna->bdg_port = cand;
+ ND("NIC %p to bridge port %d", vpna, cand);
+ /* bind the port to the bridge (virtual ports are not active) */
+ b->bdg_ports[cand] = vpna;
+ vpna->na_bdg = b;
+ b->bdg_active_ports++;
+ if (cand2 >= 0) {
+ struct netmap_vp_adapter *hostna = vpna + 1;
+ /* also bind the host stack to the bridge */
+ b->bdg_ports[cand2] = hostna;
+ hostna->bdg_port = cand2;
+ hostna->na_bdg = b;
+ b->bdg_active_ports++;
+ ND("host %p to bridge port %d", hostna, cand2);
+ }
+ ND("if %s refs %d", name, vpna->up.na_refcount);
+ BDG_WUNLOCK(b);
+ *na = ret;
+ netmap_adapter_get(ret);
+ return 0;
+
+out:
+ if_rele(ifp);
+
+ return error;
+}
+
+
+/* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
+static int
+nm_bdg_attach(struct nmreq *nmr)
+{
+ struct netmap_adapter *na;
+ struct netmap_if *nifp;
+ struct netmap_priv_d *npriv;
+ struct netmap_bwrap_adapter *bna;
+ int error;
+
+ npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (npriv == NULL)
+ return ENOMEM;
+ NMG_LOCK();
+ /* XXX probably netmap_get_bdg_na() */
+ error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
+ if (error) /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ /* netmap_get_na() sets na_bdg if this is a physical interface
+ * that we can attach to a switch.
+ */
+ if (!nma_is_bwrap(na)) {
+ /* got reference to a virtual port or direct access to a NIC.
+ * perhaps specified no bridge prefix or wrong NIC name
+ */
+ error = EINVAL;
+ goto unref_exit;
+ }
+
+ if (na->active_fds > 0) { /* already registered */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
+ if (!nifp) {
+ goto unref_exit;
+ }
+
+ bna = (struct netmap_bwrap_adapter*)na;
+ bna->na_kpriv = npriv;
+ NMG_UNLOCK();
+ ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ bzero(npriv, sizeof(*npriv));
+ free(npriv, M_DEVBUF);
+ return error;
+}
+
+static int
+nm_bdg_detach(struct nmreq *nmr)
+{
+ struct netmap_adapter *na;
+ int error;
+ struct netmap_bwrap_adapter *bna;
+ int last_instance;
+
+ NMG_LOCK();
+ error = netmap_get_na(nmr, &na, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+ if (!nma_is_bwrap(na)) {
+ /* got reference to a virtual port or direct access to a NIC.
+ * perhaps specified no bridge's prefix or wrong NIC's name
+ */
+ error = EINVAL;
+ goto unref_exit;
+ }
+ bna = (struct netmap_bwrap_adapter *)na;
+
+ if (na->active_fds == 0) { /* not registered */
+ error = EINVAL;
+ goto unref_exit;
+ }
+
+ last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
+ if (!last_instance) {
+ D("--- error, trying to detach an entry with active mmaps");
+ error = EINVAL;
+ } else {
+ struct netmap_priv_d *npriv = bna->na_kpriv;
+
+ bna->na_kpriv = NULL;
+ D("deleting priv");
+
+ bzero(npriv, sizeof(*npriv));
+ free(npriv, M_DEVBUF);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+
+/* exported to kernel callers, e.g. OVS ?
+ * Entry point.
+ * Called without NMG_LOCK.
+ */
+int
+netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
+{
+ struct nm_bridge *b;
+ struct netmap_adapter *na;
+ struct netmap_vp_adapter *vpna;
+ struct ifnet *iter;
+ char *name = nmr->nr_name;
+ int cmd = nmr->nr_cmd, namelen = strlen(name);
+ int error = 0, i, j;
+
+ switch (cmd) {
+ case NETMAP_BDG_ATTACH:
+ error = nm_bdg_attach(nmr);
+ break;
+
+ case NETMAP_BDG_DETACH:
+ error = nm_bdg_detach(nmr);
+ break;
+
+ case NETMAP_BDG_LIST:
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
+ error = EINVAL;
+ break;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */);
+ if (!b) {
+ error = ENOENT;
+ NMG_UNLOCK();
+ break;
+ }
+
+ error = ENOENT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ D("---AAAAAAAAARGH-------");
+ continue;
+ }
+ iter = vpna->up.ifp;
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(iter->if_xname, name)) {
+ /* bridge index */
+ nmr->nr_arg1 = b - nm_bridges;
+ nmr->nr_arg2 = i; /* port index */
+ error = 0;
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = nmr->nr_arg1;
+ j = nmr->nr_arg2;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = nm_bridges + i;
+ if (j >= b->bdg_active_ports) {
+ j = 0; /* following bridges scan from 0 */
+ continue;
+ }
+ nmr->nr_arg1 = i;
+ nmr->nr_arg2 = j;
+ j = b->bdg_port_index[j];
+ vpna = b->bdg_ports[j];
+ iter = vpna->up.ifp;
+ strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
+ error = 0;
+ break;
+ }
+ NMG_UNLOCK();
+ }
+ break;
+
+ case NETMAP_BDG_LOOKUP_REG:
+ /* register a lookup function to the given bridge.
+ * nmr->nr_name may be just bridge's name (including ':'
+ * if it is not just NM_NAME).
+ */
+ if (!func) {
+ error = EINVAL;
+ break;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */);
+ if (!b) {
+ error = EINVAL;
+ } else {
+ b->nm_bdg_lookup = func;
+ }
+ NMG_UNLOCK();
+ break;
+
+ case NETMAP_BDG_OFFSET:
+ NMG_LOCK();
+ error = netmap_get_bdg_na(nmr, &na, 0);
+ if (!error) {
+ vpna = (struct netmap_vp_adapter *)na;
+ if (nmr->nr_arg1 > NETMAP_BDG_MAX_OFFSET)
+ nmr->nr_arg1 = NETMAP_BDG_MAX_OFFSET;
+ vpna->offset = nmr->nr_arg1;
+ D("Using offset %d for %p", vpna->offset, vpna);
+ }
+ NMG_UNLOCK();
+ break;
+
+ default:
+ D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
+ error = EINVAL;
+ break;
+ }
+ return error;
+}
+
+
+static int
+netmap_vp_krings_create(struct netmap_adapter *na)
+{
+ u_int ntx, nrx, tailroom;
+ int error, i;
+ uint32_t *leases;
+
+ /* XXX vps do not need host rings,
+ * but we crash if we don't have one
+ */
+ ntx = na->num_tx_rings + 1;
+ nrx = na->num_rx_rings + 1;
+
+ /*
+ * Leases are attached to RX rings on vale ports
+ */
+ tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
+
+ error = netmap_krings_create(na, ntx, nrx, tailroom);
+ if (error)
+ return error;
+
+ leases = na->tailroom;
+
+ for (i = 0; i < nrx; i++) { /* Receive rings */
+ na->rx_rings[i].nkr_leases = leases;
+ leases += na->num_rx_desc;
+ }
+
+ error = nm_alloc_bdgfwd(na);
+ if (error) {
+ netmap_krings_delete(na);
+ return error;
+ }
+
+ return 0;
+}
+
+static void
+netmap_vp_krings_delete(struct netmap_adapter *na)
+{
+ nm_free_bdgfwd(na);
+ netmap_krings_delete(na);
+}
+
+
+static int
+nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
+ struct netmap_vp_adapter *na, u_int ring_nr);
+
+
+/*
+ * Grab packets from a kring, move them into the ft structure
+ * associated to the tx (input) port. Max one instance per port,
+ * filtered on input (ioctl, poll or XXX).
+ * Returns the next position in the ring.
+ */
+static int
+nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
+ struct netmap_kring *kring, u_int end)
+{
+ struct netmap_ring *ring = kring->ring;
+ struct nm_bdg_fwd *ft;
+ u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
+ u_int ft_i = 0; /* start from 0 */
+ u_int frags = 1; /* how many frags ? */
+ struct nm_bridge *b = na->na_bdg;
+
+ /* To protect against modifications to the bridge we acquire a
+ * shared lock, waiting if we can sleep (if the source port is
+ * attached to a user process) or with a trylock otherwise (NICs).
+ */
+ ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
+ if (na->up.na_flags & NAF_BDG_MAYSLEEP)
+ BDG_RLOCK(b);
+ else if (!BDG_RTRYLOCK(b))
+ return 0;
+ ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
+ ft = kring->nkr_ft;
+
+ for (; likely(j != end); j = nm_next(j, lim)) {
+ struct netmap_slot *slot = &ring->slot[j];
+ char *buf;
+
+ ft[ft_i].ft_len = slot->len;
+ ft[ft_i].ft_flags = slot->flags;
+
+ ND("flags is 0x%x", slot->flags);
+ /* this slot goes into a list so initialize the link field */
+ ft[ft_i].ft_next = NM_FT_NULL;
+ buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
+ (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
+ prefetch(buf);
+ ++ft_i;
+ if (slot->flags & NS_MOREFRAG) {
+ frags++;
+ continue;
+ }
+ if (unlikely(netmap_verbose && frags > 1))
+ RD(5, "%d frags at %d", frags, ft_i - frags);
+ ft[ft_i - frags].ft_frags = frags;
+ frags = 1;
+ if (unlikely((int)ft_i >= bridge_batch))
+ ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ }
+ if (frags > 1) {
+ D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
+ // ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
+ ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
+ ft[ft_i - frags].ft_frags = frags - 1;
+ }
+ if (ft_i)
+ ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ BDG_RUNLOCK(b);
+ return j;
+}
+
+
+/*
+ *---- support for virtual bridge -----
+ */
+
+/* ----- FreeBSD if_bridge hash function ------- */
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ *
+ * http://www.burtleburtle.net/bob/hash/spooky.html
+ */
+#define mix(a, b, c) \
+do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (/*CONSTCOND*/0)
+
+static __inline uint32_t
+nm_bridge_rthash(const uint8_t *addr)
+{
+ uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
+
+ b += addr[5] << 8;
+ b += addr[4];
+ a += addr[3] << 24;
+ a += addr[2] << 16;
+ a += addr[1] << 8;
+ a += addr[0];
+
+ mix(a, b, c);
+#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
+ return (c & BRIDGE_RTHASH_MASK);
+}
+
+#undef mix
+
+
+static int
+bdg_netmap_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_vp_adapter *vpna =
+ (struct netmap_vp_adapter*)na;
+ struct ifnet *ifp = na->ifp;
+
+ /* the interface is already attached to the bridge,
+ * so we only need to toggle IFCAP_NETMAP.
+ */
+ BDG_WLOCK(vpna->na_bdg);
+ if (onoff) {
+ ifp->if_capenable |= IFCAP_NETMAP;
+ } else {
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+ }
+ BDG_WUNLOCK(vpna->na_bdg);
+ return 0;
+}
+
+
+/*
+ * Lookup function for a learning bridge.
+ * Update the hash table with the source address,
+ * and then returns the destination port index, and the
+ * ring in *dst_ring (at the moment, always use ring 0)
+ */
+u_int
+netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
+ struct netmap_vp_adapter *na)
+{
+ struct nm_hash_ent *ht = na->na_bdg->ht;
+ uint32_t sh, dh;
+ u_int dst, mysrc = na->bdg_port;
+ uint64_t smac, dmac;
+
+ if (buf_len < 14) {
+ D("invalid buf length %d", buf_len);
+ return NM_BDG_NOPORT;
+ }
+ dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
+ smac = le64toh(*(uint64_t *)(buf + 4));
+ smac >>= 16;
+
+ /*
+ * The hash is somewhat expensive, there might be some
+ * worthwhile optimizations here.
+ */
+ if ((buf[6] & 1) == 0) { /* valid src */
+ uint8_t *s = buf+6;
+ sh = nm_bridge_rthash(s); // XXX hash of source
+ /* update source port forwarding entry */
+ ht[sh].mac = smac; /* XXX expire ? */
+ ht[sh].ports = mysrc;
+ if (netmap_verbose)
+ D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
+ s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
+ }
+ dst = NM_BDG_BROADCAST;
+ if ((buf[0] & 1) == 0) { /* unicast */
+ dh = nm_bridge_rthash(buf); // XXX hash of dst
+ if (ht[dh].mac == dmac) { /* found dst */
+ dst = ht[dh].ports;
+ }
+ /* XXX otherwise return NM_BDG_UNKNOWN ? */
+ }
+ *dst_ring = 0;
+ return dst;
+}
+
+
+/*
+ * This flush routine supports only unicast and broadcast but a large
+ * number of ports, and lets us replace the learn and dispatch functions.
+ */
+int
+nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
+ u_int ring_nr)
+{
+ struct nm_bdg_q *dst_ents, *brddst;
+ uint16_t num_dsts = 0, *dsts;
+ struct nm_bridge *b = na->na_bdg;
+ u_int i, j, me = na->bdg_port;
+
+ /*
+ * The work area (pointed by ft) is followed by an array of
+ * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
+ * queues per port plus one for the broadcast traffic.
+ * Then we have an array of destination indexes.
+ */
+ dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
+
+ /* first pass: find a destination for each packet in the batch */
+ for (i = 0; likely(i < n); i += ft[i].ft_frags) {
+ uint8_t dst_ring = ring_nr; /* default, same ring as origin */
+ uint16_t dst_port, d_i;
+ struct nm_bdg_q *d;
+ uint8_t *buf = ft[i].ft_buf;
+ u_int len = ft[i].ft_len;
+
+ ND("slot %d frags %d", i, ft[i].ft_frags);
+ /* Drop the packet if the offset is not into the first
+ fragment nor at the very beginning of the second. */
+ if (unlikely(na->offset > len))
+ continue;
+ if (len == na->offset) {
+ buf = ft[i+1].ft_buf;
+ len = ft[i+1].ft_len;
+ } else {
+ buf += na->offset;
+ len -= na->offset;
+ }
+ dst_port = b->nm_bdg_lookup(buf, len, &dst_ring, na);
+ if (netmap_verbose > 255)
+ RD(5, "slot %d port %d -> %d", i, me, dst_port);
+ if (dst_port == NM_BDG_NOPORT)
+ continue; /* this packet is identified to be dropped */
+ else if (unlikely(dst_port > NM_BDG_MAXPORTS))
+ continue;
+ else if (dst_port == NM_BDG_BROADCAST)
+ dst_ring = 0; /* broadcasts always go to ring 0 */
+ else if (unlikely(dst_port == me ||
+ !b->bdg_ports[dst_port]))
+ continue;
+
+ /* get a position in the scratch pad */
+ d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
+ d = dst_ents + d_i;
+
+ /* append the first fragment to the list */
+ if (d->bq_head == NM_FT_NULL) { /* new destination */
+ d->bq_head = d->bq_tail = i;
+ /* remember this position to be scanned later */
+ if (dst_port != NM_BDG_BROADCAST)
+ dsts[num_dsts++] = d_i;
+ } else {
+ ft[d->bq_tail].ft_next = i;
+ d->bq_tail = i;
+ }
+ d->bq_len += ft[i].ft_frags;
+ }
+
+ /*
+ * Broadcast traffic goes to ring 0 on all destinations.
+ * So we need to add these rings to the list of ports to scan.
+ * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
+ * expensive. We should keep a compact list of active destinations
+ * so we could shorten this loop.
+ */
+ brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
+ if (brddst->bq_head != NM_FT_NULL) {
+ for (j = 0; likely(j < b->bdg_active_ports); j++) {
+ uint16_t d_i;
+ i = b->bdg_port_index[j];
+ if (unlikely(i == me))
+ continue;
+ d_i = i * NM_BDG_MAXRINGS;
+ if (dst_ents[d_i].bq_head == NM_FT_NULL)
+ dsts[num_dsts++] = d_i;
+ }
+ }
+
+ ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
+ /* second pass: scan destinations (XXX will be modular somehow) */
+ for (i = 0; i < num_dsts; i++) {
+ struct ifnet *dst_ifp;
+ struct netmap_vp_adapter *dst_na;
+ struct netmap_kring *kring;
+ struct netmap_ring *ring;
+ u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
+ u_int needed, howmany;
+ int retry = netmap_txsync_retry;
+ struct nm_bdg_q *d;
+ uint32_t my_start = 0, lease_idx = 0;
+ int nrings;
+ int offset_mismatch;
+
+ d_i = dsts[i];
+ ND("second pass %d port %d", i, d_i);
+ d = dst_ents + d_i;
+ // XXX fix the division
+ dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
+ /* protect from the lookup function returning an inactive
+ * destination port
+ */
+ if (unlikely(dst_na == NULL))
+ goto cleanup;
+ if (dst_na->up.na_flags & NAF_SW_ONLY)
+ goto cleanup;
+ dst_ifp = dst_na->up.ifp;
+ /*
+ * The interface may be in !netmap mode in two cases:
+ * - when na is attached but not activated yet;
+ * - when na is being deactivated but is still attached.
+ */
+ if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
+ ND("not in netmap mode!");
+ goto cleanup;
+ }
+
+ offset_mismatch = (dst_na->offset != na->offset);
+
+ /* there is at least one either unicast or broadcast packet */
+ brd_next = brddst->bq_head;
+ next = d->bq_head;
+ /* we need to reserve this many slots. If fewer are
+ * available, some packets will be dropped.
+ * Packets may have multiple fragments, so we may not use
+ * there is a chance that we may not use all of the slots
+ * we have claimed, so we will need to handle the leftover
+ * ones when we regain the lock.
+ */
+ needed = d->bq_len + brddst->bq_len;
+
+ ND(5, "pass 2 dst %d is %x %s",
+ i, d_i, is_vp ? "virtual" : "nic/host");
+ dst_nr = d_i & (NM_BDG_MAXRINGS-1);
+ nrings = dst_na->up.num_rx_rings;
+ if (dst_nr >= nrings)
+ dst_nr = dst_nr % nrings;
+ kring = &dst_na->up.rx_rings[dst_nr];
+ ring = kring->ring;
+ lim = kring->nkr_num_slots - 1;
+
+retry:
+
+ /* reserve the buffers in the queue and an entry
+ * to report completion, and drop lock.
+ * XXX this might become a helper function.
+ */
+ mtx_lock(&kring->q_lock);
+ if (kring->nkr_stopped) {
+ mtx_unlock(&kring->q_lock);
+ goto cleanup;
+ }
+ if (dst_na->retry) {
+ dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+ }
+ my_start = j = kring->nkr_hwlease;
+ howmany = nm_kr_space(kring, 1);
+ if (needed < howmany)
+ howmany = needed;
+ lease_idx = nm_kr_lease(kring, howmany, 1);
+ mtx_unlock(&kring->q_lock);
+
+ /* only retry if we need more than available slots */
+ if (retry && needed <= howmany)
+ retry = 0;
+
+ /* copy to the destination queue */
+ while (howmany > 0) {
+ struct netmap_slot *slot;
+ struct nm_bdg_fwd *ft_p, *ft_end;
+ u_int cnt;
+ int fix_mismatch = offset_mismatch;
+
+ /* find the queue from which we pick next packet.
+ * NM_FT_NULL is always higher than valid indexes
+ * so we never dereference it if the other list
+ * has packets (and if both are empty we never
+ * get here).
+ */
+ if (next < brd_next) {
+ ft_p = ft + next;
+ next = ft_p->ft_next;
+ } else { /* insert broadcast */
+ ft_p = ft + brd_next;
+ brd_next = ft_p->ft_next;
+ }
+ cnt = ft_p->ft_frags; // cnt > 0
+ if (unlikely(cnt > howmany))
+ break; /* no more space */
+ howmany -= cnt;
+ if (netmap_verbose && cnt > 1)
+ RD(5, "rx %d frags to %d", cnt, j);
+ ft_end = ft_p + cnt;
+ do {
+ char *dst, *src = ft_p->ft_buf;
+ size_t copy_len = ft_p->ft_len, dst_len = copy_len;
+
+ slot = &ring->slot[j];
+ dst = BDG_NMB(&dst_na->up, slot);
+
+ if (unlikely(fix_mismatch)) {
+ if (na->offset > dst_na->offset) {
+ src += na->offset - dst_na->offset;
+ copy_len -= na->offset - dst_na->offset;
+ dst_len = copy_len;
+ } else {
+ bzero(dst, dst_na->offset - na->offset);
+ dst_len += dst_na->offset - na->offset;
+ dst += dst_na->offset - na->offset;
+ }
+ /* fix the first fragment only */
+ fix_mismatch = 0;
+ /* completely skip an header only fragment */
+ if (copy_len == 0) {
+ ft_p++;
+ continue;
+ }
+ }
+ /* round to a multiple of 64 */
+ copy_len = (copy_len + 63) & ~63;
+
+ ND("send %d %d bytes at %s:%d",
+ i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
+ if (ft_p->ft_flags & NS_INDIRECT) {
+ if (copyin(src, dst, copy_len)) {
+ // invalid user pointer, pretend len is 0
+ dst_len = 0;
+ }
+ } else {
+ //memcpy(dst, src, copy_len);
+ pkt_copy(src, dst, (int)copy_len);
+ }
+ slot->len = dst_len;
+ slot->flags = (cnt << 8)| NS_MOREFRAG;
+ j = nm_next(j, lim);
+ ft_p++;
+ sent++;
+ } while (ft_p != ft_end);
+ slot->flags = (cnt << 8); /* clear flag on last entry */
+ /* are we done ? */
+ if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
+ break;
+ }
+ {
+ /* current position */
+ uint32_t *p = kring->nkr_leases; /* shorthand */
+ uint32_t update_pos;
+ int still_locked = 1;
+
+ mtx_lock(&kring->q_lock);
+ if (unlikely(howmany > 0)) {
+ /* not used all bufs. If i am the last one
+ * i can recover the slots, otherwise must
+ * fill them with 0 to mark empty packets.
+ */
+ ND("leftover %d bufs", howmany);
+ if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
+ /* yes i am the last one */
+ ND("roll back nkr_hwlease to %d", j);
+ kring->nkr_hwlease = j;
+ } else {
+ while (howmany-- > 0) {
+ ring->slot[j].len = 0;
+ ring->slot[j].flags = 0;
+ j = nm_next(j, lim);
+ }
+ }
+ }
+ p[lease_idx] = j; /* report I am done */
+
+ update_pos = nm_kr_rxpos(kring);
+
+ if (my_start == update_pos) {
+ /* all slots before my_start have been reported,
+ * so scan subsequent leases to see if other ranges
+ * have been completed, and to a selwakeup or txsync.
+ */
+ while (lease_idx != kring->nkr_lease_idx &&
+ p[lease_idx] != NR_NOSLOT) {
+ j = p[lease_idx];
+ p[lease_idx] = NR_NOSLOT;
+ lease_idx = nm_next(lease_idx, lim);
+ }
+ /* j is the new 'write' position. j != my_start
+ * means there are new buffers to report
+ */
+ if (likely(j != my_start)) {
+ uint32_t old_avail = kring->nr_hwavail;
+
+ kring->nr_hwavail = (j >= kring->nr_hwcur) ?
+ j - kring->nr_hwcur :
+ j + lim + 1 - kring->nr_hwcur;
+ if (kring->nr_hwavail < old_avail) {
+ D("avail shrink %d -> %d",
+ old_avail, kring->nr_hwavail);
+ }
+ dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
+ still_locked = 0;
+ mtx_unlock(&kring->q_lock);
+ if (dst_na->retry && retry--)
+ goto retry;
+ }
+ }
+ if (still_locked)
+ mtx_unlock(&kring->q_lock);
+ }
+cleanup:
+ d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
+ d->bq_len = 0;
+ }
+ brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
+ brddst->bq_len = 0;
+ return 0;
+}
+
+static int
+netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int j, k, lim = kring->nkr_num_slots - 1;
+
+ k = ring->cur;
+ if (k > lim)
+ return netmap_ring_reinit(kring);
+
+ if (bridge_batch <= 0) { /* testing only */
+ j = k; // used all
+ goto done;
+ }
+ if (bridge_batch > NM_BDG_BATCH)
+ bridge_batch = NM_BDG_BATCH;
+
+ j = nm_bdg_preflush(na, ring_nr, kring, k);
+ if (j != k)
+ D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
+ /* k-j modulo ring size is the number of slots processed */
+ if (k < j)
+ k += kring->nkr_num_slots;
+ kring->nr_hwavail = lim - (k - j);
+
+done:
+ kring->nr_hwcur = j;
+ ring->avail = kring->nr_hwavail;
+ if (netmap_verbose)
+ D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
+ return 0;
+}
+
+
+/*
+ * main dispatch routine for the bridge.
+ * We already know that only one thread is running this.
+ * we must run nm_bdg_preflush without lock.
+ */
+static int
+bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
+ return netmap_vp_txsync(vpna, ring_nr, flags);
+}
+
+
+/*
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+static int
+bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
+{
+ struct netmap_kring *kring = &na->rx_rings[ring_nr];
+ struct netmap_ring *ring = kring->ring;
+ u_int j, lim = kring->nkr_num_slots - 1;
+ u_int k = ring->cur, resvd = ring->reserved;
+ int n;
+
+ mtx_lock(&kring->q_lock);
+ if (k > lim) {
+ D("ouch dangerous reset!!!");
+ n = netmap_ring_reinit(kring);
+ goto done;
+ }
+
+ /* skip past packets that userspace has released */
+ j = kring->nr_hwcur; /* netmap ring index */
+ if (resvd > 0) {
+ if (resvd + ring->avail >= lim + 1) {
+ D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
+ ring->reserved = resvd = 0; // XXX panic...
+ }
+ k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
+ }
+
+ if (j != k) { /* userspace has released some packets. */
+ n = k - j;
+ if (n < 0)
+ n += kring->nkr_num_slots;
+ ND("userspace releases %d packets", n);
+ for (n = 0; likely(j != k); n++) {
+ struct netmap_slot *slot = &ring->slot[j];
+ void *addr = BDG_NMB(na, slot);
+
+ if (addr == netmap_buffer_base) { /* bad buf */
+ D("bad buffer index %d, ignore ?",
+ slot->buf_idx);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+ j = nm_next(j, lim);
+ }
+ kring->nr_hwavail -= n;
+ kring->nr_hwcur = k;
+ }
+ /* tell userspace that there are new packets */
+ ring->avail = kring->nr_hwavail - resvd;
+ n = 0;
+done:
+ mtx_unlock(&kring->q_lock);
+ return n;
+}
+
+static int
+bdg_netmap_attach(struct nmreq *nmr, struct ifnet *ifp)
+{
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ int error;
+
+ vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (vpna == NULL)
+ return ENOMEM;
+
+ na = &vpna->up;
+
+ na->ifp = ifp;
+
+ /* bound checking */
+ na->num_tx_rings = nmr->nr_tx_rings;
+ nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
+ nmr->nr_tx_rings = na->num_tx_rings; // write back
+ na->num_rx_rings = nmr->nr_rx_rings;
+ nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
+ nmr->nr_rx_rings = na->num_rx_rings; // write back
+ nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
+ 1, NM_BDG_MAXSLOTS, NULL);
+ na->num_tx_desc = nmr->nr_tx_slots;
+ nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
+ 1, NM_BDG_MAXSLOTS, NULL);
+ na->num_rx_desc = nmr->nr_rx_slots;
+ vpna->offset = 0;
+
+ na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
+ na->nm_txsync = bdg_netmap_txsync;
+ na->nm_rxsync = bdg_netmap_rxsync;
+ na->nm_register = bdg_netmap_reg;
+ na->nm_dtor = netmap_adapter_vp_dtor;
+ na->nm_krings_create = netmap_vp_krings_create;
+ na->nm_krings_delete = netmap_vp_krings_delete;
+ na->nm_mem = netmap_mem_private_new(NM_IFPNAME(na->ifp),
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+ /* other nmd fields are set in the common routine */
+ error = netmap_attach_common(na);
+ if (error) {
+ free(vpna, M_DEVBUF);
+ return error;
+ }
+ return 0;
+}
+
+static void
+netmap_bwrap_dtor(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct nm_bridge *b = bna->up.na_bdg,
+ *bh = bna->host.na_bdg;
+ struct ifnet *ifp = na->ifp;
+
+ ND("na %p", na);
+
+ if (b) {
+ netmap_bdg_detach_common(b, bna->up.bdg_port,
+ (bh ? bna->host.bdg_port : -1));
+ }
+
+ hwna->na_private = NULL;
+ netmap_adapter_put(hwna);
+
+ bzero(ifp, sizeof(*ifp));
+ free(ifp, M_DEVBUF);
+ na->ifp = NULL;
+
+}
+
+/*
+ * Pass packets from nic to the bridge.
+ * XXX TODO check locking: this is called from the interrupt
+ * handler so we should make sure that the interface is not
+ * disconnected while passing down an interrupt.
+ *
+ * Note, no user process can access this NIC so we can ignore
+ * the info in the 'ring'.
+ */
+/* callback that overwrites the hwna notify callback.
+ * Packets come from the outside or from the host stack and are put on an hwna rx ring.
+ * The bridge wrapper then sends the packets through the bridge.
+ */
+static int
+netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
+{
+ struct ifnet *ifp = na->ifp;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_vp_adapter *hostna = &bna->host;
+ struct netmap_kring *kring, *bkring;
+ struct netmap_ring *ring;
+ int is_host_ring = ring_nr == na->num_rx_rings;
+ struct netmap_vp_adapter *vpna = &bna->up;
+ int error = 0;
+
+ ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
+
+ if (flags & NAF_DISABLE_NOTIFY) {
+ kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
+ bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
+ if (kring->nkr_stopped)
+ netmap_disable_ring(bkring);
+ else
+ bkring->nkr_stopped = 0;
+ return 0;
+ }
+
+ if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
+ return 0;
+
+ if (tx == NR_TX)
+ return 0;
+
+ kring = &na->rx_rings[ring_nr];
+ ring = kring->ring;
+
+ /* make sure the ring is not disabled */
+ if (nm_kr_tryget(kring))
+ return 0;
+
+ if (is_host_ring && hostna->na_bdg == NULL) {
+ error = bna->save_notify(na, ring_nr, tx, flags);
+ goto put_out;
+ }
+
+ if (is_host_ring) {
+ vpna = hostna;
+ ring_nr = 0;
+ } else {
+ /* fetch packets that have arrived.
+ * XXX maybe do this in a loop ?
+ */
+ error = na->nm_rxsync(na, ring_nr, 0);
+ if (error)
+ goto put_out;
+ }
+ if (kring->nr_hwavail == 0 && netmap_verbose) {
+ D("how strange, interrupt with no packets on %s",
+ NM_IFPNAME(ifp));
+ goto put_out;
+ }
+ /* XXX avail ? */
+ ring->cur = nm_kr_rxpos(kring);
+ netmap_vp_txsync(vpna, ring_nr, flags);
+
+ if (!is_host_ring)
+ error = na->nm_rxsync(na, ring_nr, 0);
+
+put_out:
+ nm_kr_put(kring);
+ return error;
+}
+
+static int
+netmap_bwrap_register(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_vp_adapter *hostna = &bna->host;
+ int error;
+
+ ND("%s %d", NM_IFPNAME(ifp), onoff);
+
+ if (onoff) {
+ int i;
+
+ hwna->na_lut = na->na_lut;
+ hwna->na_lut_objtotal = na->na_lut_objtotal;
+
+ if (hostna->na_bdg) {
+ hostna->up.na_lut = na->na_lut;
+ hostna->up.na_lut_objtotal = na->na_lut_objtotal;
+ }
+
+ /* cross-link the netmap rings */
+ for (i = 0; i <= na->num_tx_rings; i++) {
+ hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
+ hwna->tx_rings[i].ring = na->rx_rings[i].ring;
+ }
+ for (i = 0; i <= na->num_rx_rings; i++) {
+ hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
+ hwna->rx_rings[i].ring = na->tx_rings[i].ring;
+ }
+ }
+
+ if (hwna->ifp) {
+ error = hwna->nm_register(hwna, onoff);
+ if (error)
+ return error;
+ }
+
+ bdg_netmap_reg(na, onoff);
+
+ if (onoff) {
+ bna->save_notify = hwna->nm_notify;
+ hwna->nm_notify = netmap_bwrap_intr_notify;
+ } else {
+ hwna->nm_notify = bna->save_notify;
+ hwna->na_lut = NULL;
+ hwna->na_lut_objtotal = 0;
+ }
+
+ return 0;
+}
+
+static int
+netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
+ u_int *rxr, u_int *rxd)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+
+ /* forward the request */
+ netmap_update_config(hwna);
+ /* swap the results */
+ *txr = hwna->num_rx_rings;
+ *txd = hwna->num_rx_desc;
+ *rxr = hwna->num_tx_rings;
+ *rxd = hwna->num_rx_desc;
+
+ return 0;
+}
+
+static int
+netmap_bwrap_krings_create(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_adapter *hostna = &bna->host.up;
+ int error;
+
+ ND("%s", NM_IFPNAME(na->ifp));
+
+ error = netmap_vp_krings_create(na);
+ if (error)
+ return error;
+
+ error = hwna->nm_krings_create(hwna);
+ if (error) {
+ netmap_vp_krings_delete(na);
+ return error;
+ }
+
+ hostna->tx_rings = na->tx_rings + na->num_tx_rings;
+ hostna->rx_rings = na->rx_rings + na->num_rx_rings;
+
+ return 0;
+}
+
+static void
+netmap_bwrap_krings_delete(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+
+ ND("%s", NM_IFPNAME(na->ifp));
+
+ hwna->nm_krings_delete(hwna);
+ netmap_vp_krings_delete(na);
+}
+
+/* notify method for the bridge-->hwna direction */
+static int
+netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_kring *kring, *hw_kring;
+ struct netmap_ring *ring;
+ u_int lim, k;
+ int error = 0;
+
+ if (tx == NR_TX)
+ return ENXIO;
+
+ kring = &na->rx_rings[ring_n];
+ hw_kring = &hwna->tx_rings[ring_n];
+ ring = kring->ring;
+
+ lim = kring->nkr_num_slots - 1;
+ k = nm_kr_rxpos(kring);
+
+ if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
+ return 0;
+ ring->cur = k;
+ ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
+ NM_IFPNAME(na->ifp), ring_n,
+ kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
+ ring->cur, ring->avail, ring->reserved,
+ hw_kring->nr_hwcur, hw_kring->nr_hwavail);
+ if (ring_n == na->num_rx_rings) {
+ netmap_txsync_to_host(hwna);
+ } else {
+ error = hwna->nm_txsync(hwna, ring_n, flags);
+ }
+ kring->nr_hwcur = ring->cur;
+ kring->nr_hwavail = 0;
+ kring->nr_hwreserved = lim - ring->avail;
+ ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
+ NM_IFPNAME(na->ifp), ring_n,
+ kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
+ ring->cur, ring->avail, ring->reserved,
+ hw_kring->nr_hwcur, hw_kring->nr_hwavail);
+
+ return error;
+}
+
+static int
+netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
+{
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_adapter *port_na = &bna->up.up;
+ if (tx == NR_TX || ring_n != 0)
+ return ENXIO;
+ return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
+}
+
+/* attach a bridge wrapper to the 'real' device */
+static int
+netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
+{
+ struct netmap_bwrap_adapter *bna;
+ struct netmap_adapter *na;
+ struct netmap_adapter *hwna = NA(real);
+ struct netmap_adapter *hostna;
+ int error;
+
+
+ bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (bna == NULL)
+ return ENOMEM;
+
+ na = &bna->up.up;
+ na->ifp = fake;
+ /* fill the ring data for the bwrap adapter with rx/tx meanings
+ * swapped. The real cross-linking will be done during register,
+ * when all the krings will have been created.
+ */
+ na->num_rx_rings = hwna->num_tx_rings;
+ na->num_tx_rings = hwna->num_rx_rings;
+ na->num_tx_desc = hwna->num_rx_desc;
+ na->num_rx_desc = hwna->num_tx_desc;
+ na->nm_dtor = netmap_bwrap_dtor;
+ na->nm_register = netmap_bwrap_register;
+ // na->nm_txsync = netmap_bwrap_txsync;
+ // na->nm_rxsync = netmap_bwrap_rxsync;
+ na->nm_config = netmap_bwrap_config;
+ na->nm_krings_create = netmap_bwrap_krings_create;
+ na->nm_krings_delete = netmap_bwrap_krings_delete;
+ na->nm_notify = netmap_bwrap_notify;
+ na->nm_mem = hwna->nm_mem;
+ na->na_private = na; /* prevent NIOCREGIF */
+ bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
+
+ bna->hwna = hwna;
+ netmap_adapter_get(hwna);
+ hwna->na_private = bna; /* weak reference */
+
+ hostna = &bna->host.up;
+ hostna->ifp = hwna->ifp;
+ hostna->num_tx_rings = 1;
+ hostna->num_tx_desc = hwna->num_rx_desc;
+ hostna->num_rx_rings = 1;
+ hostna->num_rx_desc = hwna->num_tx_desc;
+ // hostna->nm_txsync = netmap_bwrap_host_txsync;
+ // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+ hostna->nm_notify = netmap_bwrap_host_notify;
+ hostna->nm_mem = na->nm_mem;
+ hostna->na_private = bna;
+
+ D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+
+ error = netmap_attach_common(na);
+ if (error) {
+ netmap_adapter_put(hwna);
+ free(bna, M_DEVBUF);
+ return error;
+ }
+ return 0;
+}
+
+void
+netmap_init_bridges(void)
+{
+ int i;
+ bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
+ for (i = 0; i < NM_BRIDGES; i++)
+ BDG_RWINIT(&nm_bridges[i]);
+}
+#endif /* WITH_VALE */