aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/nvme
diff options
context:
space:
mode:
authorWarner Losh <imp@FreeBSD.org>2019-08-21 22:17:55 +0000
committerWarner Losh <imp@FreeBSD.org>2019-08-21 22:17:55 +0000
commitf182f928dba2089ceb897191e546f32362035ddc (patch)
treeed75bffa6db92cfb69ec99b253ca9209042ff598 /sys/dev/nvme
parent99f13ae12c53954deee9dbceb6732bbda79956e2 (diff)
downloadsrc-f182f928dba2089ceb897191e546f32362035ddc.tar.gz
src-f182f928dba2089ceb897191e546f32362035ddc.zip
Separate the pci attachment from the rest of nvme
Nvme drives can be attached in a number of different ways. Separate out the PCI attachment so that we can have other attachment types, like ahci and various types of NVMeoF. Submitted by: cognet@
Notes
Notes: svn path=/head/; revision=351355
Diffstat (limited to 'sys/dev/nvme')
-rw-r--r--sys/dev/nvme/nvme.c150
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c159
-rw-r--r--sys/dev/nvme/nvme_pci.c333
-rw-r--r--sys/dev/nvme/nvme_private.h7
4 files changed, 346 insertions, 303 deletions
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index 022c6643bbe4..475c7df56cea 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -36,9 +36,6 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
#include "nvme_private.h"
struct nvme_consumer {
@@ -58,106 +55,7 @@ int32_t nvme_retry_count;
MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
-static int nvme_probe(device_t);
-static int nvme_attach(device_t);
-static int nvme_detach(device_t);
-static int nvme_shutdown(device_t);
-
-static devclass_t nvme_devclass;
-
-static device_method_t nvme_pci_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, nvme_probe),
- DEVMETHOD(device_attach, nvme_attach),
- DEVMETHOD(device_detach, nvme_detach),
- DEVMETHOD(device_shutdown, nvme_shutdown),
- { 0, 0 }
-};
-
-static driver_t nvme_pci_driver = {
- "nvme",
- nvme_pci_methods,
- sizeof(struct nvme_controller),
-};
-
-DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, NULL);
-MODULE_VERSION(nvme, 1);
-MODULE_DEPEND(nvme, cam, 1, 1, 1);
-
-static struct _pcsid
-{
- uint32_t devid;
- int match_subdevice;
- uint16_t subdevice;
- const char *desc;
- uint32_t quirks;
-} pci_ids[] = {
- { 0x01118086, 0, 0, "NVMe Controller" },
- { IDT32_PCI_ID, 0, 0, "IDT NVMe Controller (32 channel)" },
- { IDT8_PCI_ID, 0, 0, "IDT NVMe Controller (8 channel)" },
- { 0x09538086, 1, 0x3702, "DC P3700 SSD" },
- { 0x09538086, 1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
- { 0x09538086, 1, 0x3704, "DC P3500 SSD [Add-in Card]" },
- { 0x09538086, 1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
- { 0x09538086, 1, 0x3709, "DC P3600 SSD [Add-in Card]" },
- { 0x09538086, 1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
- { 0x00031c58, 0, 0, "HGST SN100", QUIRK_DELAY_B4_CHK_RDY },
- { 0x00231c58, 0, 0, "WDC SN200", QUIRK_DELAY_B4_CHK_RDY },
- { 0x05401c5f, 0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
- { 0xa821144d, 0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
- { 0xa822144d, 0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
- { 0x01161179, 0, 0, "Toshiba XG5", QUIRK_DISABLE_TIMEOUT },
- { 0x00000000, 0, 0, NULL }
-};
-
-static int
-nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
-{
- if (devid != ep->devid)
- return 0;
-
- if (!ep->match_subdevice)
- return 1;
-
- if (subdevice == ep->subdevice)
- return 1;
- else
- return 0;
-}
-
-static int
-nvme_probe (device_t device)
-{
- struct _pcsid *ep;
- uint32_t devid;
- uint16_t subdevice;
-
- devid = pci_get_devid(device);
- subdevice = pci_get_subdevice(device);
- ep = pci_ids;
-
- while (ep->devid) {
- if (nvme_match(devid, subdevice, ep))
- break;
- ++ep;
- }
-
- if (ep->desc) {
- device_set_desc(device, ep->desc);
- return (BUS_PROBE_DEFAULT);
- }
-
-#if defined(PCIS_STORAGE_NVM)
- if (pci_get_class(device) == PCIC_STORAGE &&
- pci_get_subclass(device) == PCIS_STORAGE_NVM &&
- pci_get_progif(device) == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
- device_set_desc(device, "Generic NVMe Device");
- return (BUS_PROBE_GENERIC);
- }
-#endif
-
- return (ENXIO);
-}
+devclass_t nvme_devclass;
static void
nvme_init(void)
@@ -181,7 +79,7 @@ nvme_uninit(void)
SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
-static int
+int
nvme_shutdown(device_t dev)
{
struct nvme_controller *ctrlr;
@@ -225,24 +123,11 @@ nvme_dump_completion(struct nvme_completion *cpl)
cpl->cid, p, sc, sct, m, dnr);
}
-static int
+int
nvme_attach(device_t dev)
{
struct nvme_controller *ctrlr = DEVICE2SOFTC(dev);
int status;
- struct _pcsid *ep;
- uint32_t devid;
- uint16_t subdevice;
-
- devid = pci_get_devid(dev);
- subdevice = pci_get_subdevice(dev);
- ep = pci_ids;
- while (ep->devid) {
- if (nvme_match(devid, subdevice, ep))
- break;
- ++ep;
- }
- ctrlr->quirks = ep->quirks;
status = nvme_ctrlr_construct(ctrlr, dev);
@@ -252,31 +137,7 @@ nvme_attach(device_t dev)
}
/*
- * Some drives do not implement the completion timeout feature
- * correctly. There's a WAR from the manufacturer to just disable it.
- * The driver wouldn't respond correctly to a timeout anyway.
- */
- if (ep->quirks & QUIRK_DISABLE_TIMEOUT) {
- int ptr;
- uint16_t devctl2;
-
- status = pci_find_cap(dev, PCIY_EXPRESS, &ptr);
- if (status) {
- device_printf(dev, "Can't locate PCIe capability?");
- return (status);
- }
- devctl2 = pci_read_config(dev, ptr + PCIER_DEVICE_CTL2, sizeof(devctl2));
- devctl2 |= PCIEM_CTL2_COMP_TIMO_DISABLE;
- pci_write_config(dev, ptr + PCIER_DEVICE_CTL2, devctl2, sizeof(devctl2));
- }
-
- /*
- * Enable busmastering so the completion status messages can
- * be busmastered back to the host.
- */
- pci_enable_busmaster(dev);
-
- /*
+ * Reset controller twice to ensure we do a transition from cc.en==1
* Reset controller twice to ensure we do a transition from cc.en==1
* to cc.en==0. This is because we don't really know what status
* the controller was left in when boot handed off to OS.
@@ -301,13 +162,12 @@ nvme_attach(device_t dev)
return (0);
}
-static int
+int
nvme_detach (device_t dev)
{
struct nvme_controller *ctrlr = DEVICE2SOFTC(dev);
nvme_ctrlr_destruct(ctrlr, dev);
- pci_disable_busmaster(dev);
return (0);
}
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index b6dd64ac6191..aa895724302a 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -42,48 +42,12 @@ __FBSDID("$FreeBSD$");
#include <sys/uio.h>
#include <sys/endian.h>
-#include <dev/pci/pcireg.h>
-#include <dev/pci/pcivar.h>
-
#include "nvme_private.h"
#define B4_CHK_RDY_DELAY_MS 2300 /* work around controller bug */
static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
struct nvme_async_event_request *aer);
-static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
-
-static int
-nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
-{
-
- ctrlr->resource_id = PCIR_BAR(0);
-
- ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
- &ctrlr->resource_id, RF_ACTIVE);
-
- if(ctrlr->resource == NULL) {
- nvme_printf(ctrlr, "unable to allocate pci resource\n");
- return (ENOMEM);
- }
-
- ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
- ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
- ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
-
- /*
- * The NVMe spec allows for the MSI-X table to be placed behind
- * BAR 4/5, separate from the control/doorbell registers. Always
- * try to map this bar, because it must be mapped prior to calling
- * pci_alloc_msix(). If the table isn't behind BAR 4/5,
- * bus_alloc_resource() will just return NULL which is OK.
- */
- ctrlr->bar4_resource_id = PCIR_BAR(4);
- ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
- &ctrlr->bar4_resource_id, RF_ACTIVE);
-
- return (0);
-}
static int
nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
@@ -876,9 +840,8 @@ nvme_ctrlr_start(void *ctrlr_arg)
* the number of I/O queues supported, so cannot reset
* the adminq again here.
*/
- if (ctrlr->is_resetting) {
+ if (ctrlr->is_resetting)
nvme_qpair_reset(&ctrlr->adminq);
- }
for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_qpair_reset(&ctrlr->ioq[i]);
@@ -1004,34 +967,6 @@ nvme_ctrlr_intx_handler(void *arg)
nvme_mmio_write_4(ctrlr, intmc, 1);
}
-static int
-nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
-{
-
- ctrlr->msix_enabled = 0;
- ctrlr->num_io_queues = 1;
- ctrlr->num_cpus_per_ioq = mp_ncpus;
- ctrlr->rid = 0;
- ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
- &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
-
- if (ctrlr->res == NULL) {
- nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
- return (ENOMEM);
- }
-
- bus_setup_intr(ctrlr->dev, ctrlr->res,
- INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
- ctrlr, &ctrlr->tag);
-
- if (ctrlr->tag == NULL) {
- nvme_printf(ctrlr, "unable to setup intx handler\n");
- return (ENOMEM);
- }
-
- return (0);
-}
-
static void
nvme_pt_done(void *arg, const struct nvme_completion *cpl)
{
@@ -1177,88 +1112,6 @@ static struct cdevsw nvme_ctrlr_cdevsw = {
.d_ioctl = nvme_ctrlr_ioctl
};
-static void
-nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
-{
- device_t dev;
- int per_cpu_io_queues;
- int min_cpus_per_ioq;
- int num_vectors_requested, num_vectors_allocated;
- int num_vectors_available;
-
- dev = ctrlr->dev;
- min_cpus_per_ioq = 1;
- TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
-
- if (min_cpus_per_ioq < 1) {
- min_cpus_per_ioq = 1;
- } else if (min_cpus_per_ioq > mp_ncpus) {
- min_cpus_per_ioq = mp_ncpus;
- }
-
- per_cpu_io_queues = 1;
- TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
-
- if (per_cpu_io_queues == 0) {
- min_cpus_per_ioq = mp_ncpus;
- }
-
- ctrlr->force_intx = 0;
- TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
-
- /*
- * FreeBSD currently cannot allocate more than about 190 vectors at
- * boot, meaning that systems with high core count and many devices
- * requesting per-CPU interrupt vectors will not get their full
- * allotment. So first, try to allocate as many as we may need to
- * understand what is available, then immediately release them.
- * Then figure out how many of those we will actually use, based on
- * assigning an equal number of cores to each I/O queue.
- */
-
- /* One vector for per core I/O queue, plus one vector for admin queue. */
- num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
- if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
- num_vectors_available = 0;
- }
- pci_release_msi(dev);
-
- if (ctrlr->force_intx || num_vectors_available < 2) {
- nvme_ctrlr_configure_intx(ctrlr);
- return;
- }
-
- /*
- * Do not use all vectors for I/O queues - one must be saved for the
- * admin queue.
- */
- ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
- howmany(mp_ncpus, num_vectors_available - 1));
-
- ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
- num_vectors_requested = ctrlr->num_io_queues + 1;
- num_vectors_allocated = num_vectors_requested;
-
- /*
- * Now just allocate the number of vectors we need. This should
- * succeed, since we previously called pci_alloc_msix()
- * successfully returning at least this many vectors, but just to
- * be safe, if something goes wrong just revert to INTx.
- */
- if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
- nvme_ctrlr_configure_intx(ctrlr);
- return;
- }
-
- if (num_vectors_allocated < num_vectors_requested) {
- pci_release_msi(dev);
- nvme_ctrlr_configure_intx(ctrlr);
- return;
- }
-
- ctrlr->msix_enabled = 1;
-}
-
int
nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
{
@@ -1274,11 +1127,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
- status = nvme_ctrlr_allocate_bar(ctrlr);
-
- if (status != 0)
- return (status);
-
/*
* Software emulators may set the doorbell stride to something
* other than zero, but this driver is not set up to handle that.
@@ -1308,8 +1156,6 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
ctrlr->enable_aborts = 0;
TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
- nvme_ctrlr_setup_interrupts(ctrlr);
-
ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
if (nvme_ctrlr_construct_admin_qpair(ctrlr) != 0)
return (ENXIO);
@@ -1395,9 +1241,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
rman_get_rid(ctrlr->res), ctrlr->res);
- if (ctrlr->msix_enabled)
- pci_release_msi(dev);
-
if (ctrlr->bar4_resource != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY,
ctrlr->bar4_resource_id, ctrlr->bar4_resource);
diff --git a/sys/dev/nvme/nvme_pci.c b/sys/dev/nvme/nvme_pci.c
new file mode 100644
index 000000000000..358c840a1f90
--- /dev/null
+++ b/sys/dev/nvme/nvme_pci.c
@@ -0,0 +1,333 @@
+/*-
+ * Copyright (C) 2012-2016 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include "nvme_private.h"
+
+static int nvme_pci_probe(device_t);
+static int nvme_pci_attach(device_t);
+static int nvme_pci_detach(device_t);
+
+static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
+
+static device_method_t nvme_pci_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, nvme_pci_probe),
+ DEVMETHOD(device_attach, nvme_pci_attach),
+ DEVMETHOD(device_detach, nvme_pci_detach),
+ DEVMETHOD(device_shutdown, nvme_shutdown),
+ { 0, 0 }
+};
+
+static driver_t nvme_pci_driver = {
+ "nvme",
+ nvme_pci_methods,
+ sizeof(struct nvme_controller),
+};
+
+DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, 0);
+MODULE_VERSION(nvme_pci, 1);
+
+static struct _pcsid
+{
+ uint32_t devid;
+ int match_subdevice;
+ uint16_t subdevice;
+ const char *desc;
+ uint32_t quirks;
+} pci_ids[] = {
+ { 0x01118086, 0, 0, "NVMe Controller" },
+ { IDT32_PCI_ID, 0, 0, "IDT NVMe Controller (32 channel)" },
+ { IDT8_PCI_ID, 0, 0, "IDT NVMe Controller (8 channel)" },
+ { 0x09538086, 1, 0x3702, "DC P3700 SSD" },
+ { 0x09538086, 1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
+ { 0x09538086, 1, 0x3704, "DC P3500 SSD [Add-in Card]" },
+ { 0x09538086, 1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
+ { 0x09538086, 1, 0x3709, "DC P3600 SSD [Add-in Card]" },
+ { 0x09538086, 1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
+ { 0x00031c58, 0, 0, "HGST SN100", QUIRK_DELAY_B4_CHK_RDY },
+ { 0x00231c58, 0, 0, "WDC SN200", QUIRK_DELAY_B4_CHK_RDY },
+ { 0x05401c5f, 0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
+ { 0xa821144d, 0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
+ { 0xa822144d, 0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
+ { 0x00000000, 0, 0, NULL }
+};
+
+
+static int
+nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
+{
+ if (devid != ep->devid)
+ return 0;
+
+ if (!ep->match_subdevice)
+ return 1;
+
+ if (subdevice == ep->subdevice)
+ return 1;
+ else
+ return 0;
+}
+
+static int
+nvme_pci_probe (device_t device)
+{
+ struct nvme_controller *ctrlr = DEVICE2SOFTC(device);
+ struct _pcsid *ep;
+ uint32_t devid;
+ uint16_t subdevice;
+
+ devid = pci_get_devid(device);
+ subdevice = pci_get_subdevice(device);
+ ep = pci_ids;
+
+ while (ep->devid) {
+ if (nvme_match(devid, subdevice, ep))
+ break;
+ ++ep;
+ }
+ if (ep->devid)
+ ctrlr->quirks = ep->quirks;
+
+ if (ep->desc) {
+ device_set_desc(device, ep->desc);
+ return (BUS_PROBE_DEFAULT);
+ }
+
+#if defined(PCIS_STORAGE_NVM)
+ if (pci_get_class(device) == PCIC_STORAGE &&
+ pci_get_subclass(device) == PCIS_STORAGE_NVM &&
+ pci_get_progif(device) == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
+ device_set_desc(device, "Generic NVMe Device");
+ return (BUS_PROBE_GENERIC);
+ }
+#endif
+
+ return (ENXIO);
+}
+
+static int
+nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
+{
+
+ ctrlr->resource_id = PCIR_BAR(0);
+
+ ctrlr->resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
+ &ctrlr->resource_id, RF_ACTIVE);
+
+ if(ctrlr->resource == NULL) {
+ nvme_printf(ctrlr, "unable to allocate pci resource\n");
+ return (ENOMEM);
+ }
+
+ ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
+ ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
+ ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
+
+ /*
+ * The NVMe spec allows for the MSI-X table to be placed behind
+ * BAR 4/5, separate from the control/doorbell registers. Always
+ * try to map this bar, because it must be mapped prior to calling
+ * pci_alloc_msix(). If the table isn't behind BAR 4/5,
+ * bus_alloc_resource() will just return NULL which is OK.
+ */
+ ctrlr->bar4_resource_id = PCIR_BAR(4);
+ ctrlr->bar4_resource = bus_alloc_resource_any(ctrlr->dev, SYS_RES_MEMORY,
+ &ctrlr->bar4_resource_id, RF_ACTIVE);
+
+ return (0);
+}
+
+static int
+nvme_pci_attach(device_t dev)
+{
+ struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+ int status;
+
+ ctrlr->dev = dev;
+ status = nvme_ctrlr_allocate_bar(ctrlr);
+ if (status != 0)
+ goto bad;
+ pci_enable_busmaster(dev);
+ nvme_ctrlr_setup_interrupts(ctrlr);
+ return nvme_attach(dev);
+bad:
+ if (ctrlr->resource != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ ctrlr->resource_id, ctrlr->resource);
+ }
+
+ if (ctrlr->bar4_resource != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY,
+ ctrlr->bar4_resource_id, ctrlr->bar4_resource);
+ }
+
+ if (ctrlr->tag)
+ bus_teardown_intr(dev, ctrlr->res, ctrlr->tag);
+
+ if (ctrlr->res)
+ bus_release_resource(dev, SYS_RES_IRQ,
+ rman_get_rid(ctrlr->res), ctrlr->res);
+
+ if (ctrlr->msix_enabled)
+ pci_release_msi(dev);
+
+ return status;
+}
+
+static int
+nvme_pci_detach(device_t dev)
+{
+ struct nvme_controller*ctrlr = DEVICE2SOFTC(dev);
+
+ if (ctrlr->msix_enabled)
+ pci_release_msi(dev);
+ pci_disable_busmaster(dev);
+ return (nvme_detach(dev));
+}
+
+static int
+nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
+{
+
+ ctrlr->msix_enabled = 0;
+ ctrlr->num_io_queues = 1;
+ ctrlr->num_cpus_per_ioq = mp_ncpus;
+ ctrlr->rid = 0;
+ ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
+ &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
+
+ if (ctrlr->res == NULL) {
+ nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
+ return (ENOMEM);
+ }
+
+ bus_setup_intr(ctrlr->dev, ctrlr->res,
+ INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
+ ctrlr, &ctrlr->tag);
+
+ if (ctrlr->tag == NULL) {
+ nvme_printf(ctrlr, "unable to setup intx handler\n");
+ return (ENOMEM);
+ }
+
+ return (0);
+}
+
+static void
+nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
+{
+ device_t dev;
+ int per_cpu_io_queues;
+ int min_cpus_per_ioq;
+ int num_vectors_requested, num_vectors_allocated;
+ int num_vectors_available;
+
+ dev = ctrlr->dev;
+ min_cpus_per_ioq = 1;
+ TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+ if (min_cpus_per_ioq < 1) {
+ min_cpus_per_ioq = 1;
+ } else if (min_cpus_per_ioq > mp_ncpus) {
+ min_cpus_per_ioq = mp_ncpus;
+ }
+
+ per_cpu_io_queues = 1;
+ TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+
+ if (per_cpu_io_queues == 0) {
+ min_cpus_per_ioq = mp_ncpus;
+ }
+
+ ctrlr->force_intx = 0;
+ TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
+
+ /*
+ * FreeBSD currently cannot allocate more than about 190 vectors at
+ * boot, meaning that systems with high core count and many devices
+ * requesting per-CPU interrupt vectors will not get their full
+ * allotment. So first, try to allocate as many as we may need to
+ * understand what is available, then immediately release them.
+ * Then figure out how many of those we will actually use, based on
+ * assigning an equal number of cores to each I/O queue.
+ */
+
+ /* One vector for per core I/O queue, plus one vector for admin queue. */
+ num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
+ if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
+ num_vectors_available = 0;
+ }
+ pci_release_msi(dev);
+
+ if (ctrlr->force_intx || num_vectors_available < 2) {
+ nvme_ctrlr_configure_intx(ctrlr);
+ return;
+ }
+
+ /*
+ * Do not use all vectors for I/O queues - one must be saved for the
+ * admin queue.
+ */
+ ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+ howmany(mp_ncpus, num_vectors_available - 1));
+
+ ctrlr->num_io_queues = howmany(mp_ncpus, ctrlr->num_cpus_per_ioq);
+ num_vectors_requested = ctrlr->num_io_queues + 1;
+ num_vectors_allocated = num_vectors_requested;
+
+ /*
+ * Now just allocate the number of vectors we need. This should
+ * succeed, since we previously called pci_alloc_msix()
+ * successfully returning at least this many vectors, but just to
+ * be safe, if something goes wrong just revert to INTx.
+ */
+ if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
+ nvme_ctrlr_configure_intx(ctrlr);
+ return;
+ }
+
+ if (num_vectors_allocated < num_vectors_requested) {
+ pci_release_msi(dev);
+ nvme_ctrlr_configure_intx(ctrlr);
+ return;
+ }
+
+ ctrlr->msix_enabled = 1;
+}
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index dda13bcce280..910c579a94e2 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -37,6 +37,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/rman.h>
#include <sys/systm.h>
@@ -122,6 +123,8 @@ struct nvme_completion_poll_status {
int done;
};
+extern devclass_t nvme_devclass;
+
#define NVME_REQUEST_VADDR 1
#define NVME_REQUEST_NULL 2 /* For requests with no payload. */
#define NVME_REQUEST_UIO 3
@@ -439,6 +442,10 @@ void nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr);
void nvme_dump_command(struct nvme_command *cmd);
void nvme_dump_completion(struct nvme_completion *cpl);
+int nvme_attach(device_t dev);
+int nvme_shutdown(device_t dev);
+int nvme_detach(device_t dev);
+
static __inline void
nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
{