diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 14:31:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-01-31 14:31:12 +0000 |
commit | 6a0fc39710cd2fd18addb70c5e932d4d443a1859 (patch) | |
tree | d8345fe1afcf306c2b829876af57f92f4611bc28 | |
parent | 174411efe9b6fbca9ae37325985fa14be36774b7 (diff) | |
parent | 620d4f3c0bbe724386cce86ca899c11b71e1a930 (diff) | |
download | src-6a0fc39710cd2fd18addb70c5e932d4d443a1859.tar.gz src-6a0fc39710cd2fd18addb70c5e932d4d443a1859.zip |
Merge ^/head r277956 through r277974.
Notes
Notes:
svn path=/projects/clang360-import/; revision=277975
35 files changed, 2110 insertions, 1461 deletions
diff --git a/bin/sh/jobs.c b/bin/sh/jobs.c index 765e6a2ed3aa..b1bfa0944d71 100644 --- a/bin/sh/jobs.c +++ b/bin/sh/jobs.c @@ -87,8 +87,8 @@ static int ttyfd = -1; /* mode flags for dowait */ #define DOWAIT_BLOCK 0x1 /* wait until a child exits */ -#define DOWAIT_SIG 0x2 /* if DOWAIT_BLOCK, abort on SIGINT/SIGQUIT */ -#define DOWAIT_SIG_ANY 0x4 /* if DOWAIT_SIG, abort on any signal */ +#define DOWAIT_SIG 0x2 /* if DOWAIT_BLOCK, abort on signal */ +#define DOWAIT_SIG_TRAP 0x4 /* if DOWAIT_SIG, abort on trapped signal only */ #if JOBS static void restartjob(struct job *); @@ -1028,7 +1028,7 @@ waitforjob(struct job *jp, int *origstatus) TRACE(("waitforjob(%%%td) called\n", jp - jobtab + 1)); while (jp->state == 0) if (dowait(DOWAIT_BLOCK | (Tflag ? DOWAIT_SIG | - DOWAIT_SIG_ANY : 0), jp) == -1) + DOWAIT_SIG_TRAP : 0), jp) == -1) dotrap(); #if JOBS if (jp->jobctl) { @@ -1120,7 +1120,7 @@ dowait(int mode, struct job *job) TRACE(("wait returns %d, status=%d\n", (int)pid, status)); if (pid == 0 && (mode & DOWAIT_SIG) != 0) { pid = -1; - if (((mode & DOWAIT_SIG_ANY) != 0 ? + if (((mode & DOWAIT_SIG_TRAP) != 0 ? pendingsig : pendingsig_waitcmd) != 0) { errno = EINTR; break; diff --git a/bin/sh/trap.c b/bin/sh/trap.c index c23e6bc3b1ae..4a185b4874f5 100644 --- a/bin/sh/trap.c +++ b/bin/sh/trap.c @@ -74,7 +74,7 @@ __FBSDID("$FreeBSD$"); static char sigmode[NSIG]; /* current value of signal */ volatile sig_atomic_t pendingsig; /* indicates some signal received */ -volatile sig_atomic_t pendingsig_waitcmd; /* indicates SIGINT/SIGQUIT received */ +volatile sig_atomic_t pendingsig_waitcmd; /* indicates wait builtin should be interrupted */ static int in_dotrap; /* do we execute in a trap handler? */ static char *volatile trap[NSIG]; /* trap handler commands */ static volatile sig_atomic_t gotsig[NSIG]; @@ -400,6 +400,7 @@ onsig(int signo) (signo != SIGCHLD || !ignore_sigchld)) { gotsig[signo] = 1; pendingsig = signo; + pendingsig_waitcmd = signo; } } diff --git a/lib/libproc/proc_sym.c b/lib/libproc/proc_sym.c index 90d40a899c33..d4f82629fb7b 100644 --- a/lib/libproc/proc_sym.c +++ b/lib/libproc/proc_sym.c @@ -153,9 +153,12 @@ proc_iter_objs(struct proc_handle *p, proc_map_f *func, void *cd) prmap_t map; char path[MAXPATHLEN]; char last[MAXPATHLEN]; + int error; if (p->nobjs == 0) return (-1); + + error = 0; memset(last, 0, sizeof(last)); for (i = 0; i < p->nobjs; i++) { rdl = &p->rdobjs[i]; @@ -169,11 +172,11 @@ proc_iter_objs(struct proc_handle *p, proc_map_f *func, void *cd) */ if (strcmp(path, last) == 0) continue; - (*func)(cd, &map, path); + if ((error = (*func)(cd, &map, path)) != 0) + break; strlcpy(last, path, sizeof(last)); } - - return (0); + return (error); } prmap_t * @@ -599,7 +602,8 @@ proc_iter_symbyaddr(struct proc_handle *p, const char *object, int which, s = elf_strptr(e, stridx, sym.st_name); if (ehdr.e_type != ET_EXEC) sym.st_value += map->pr_vaddr; - (*func)(cd, &sym, s); + if ((error = (*func)(cd, &sym, s)) != 0) + goto err2; } error = 0; err2: diff --git a/sys/arm/allwinner/a10_gpio.c b/sys/arm/allwinner/a10_gpio.c index 23bf399d3dc3..12d69890a500 100644 --- a/sys/arm/allwinner/a10_gpio.c +++ b/sys/arm/allwinner/a10_gpio.c @@ -427,7 +427,7 @@ a10_gpio_attach(device_t dev) RF_ACTIVE); if (!sc->sc_mem_res) { device_printf(dev, "cannot allocate memory window\n"); - return (ENXIO); + goto fail; } sc->sc_bst = rman_get_bustag(sc->sc_mem_res); @@ -437,9 +437,8 @@ a10_gpio_attach(device_t dev) sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->sc_irq_res) { - bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); device_printf(dev, "cannot allocate interrupt\n"); - return (ENXIO); + goto fail; } /* Find our node. */ @@ -472,6 +471,8 @@ fail: bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); if (sc->sc_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); + mtx_destroy(&sc->sc_mtx); + return (ENXIO); } diff --git a/sys/arm/altera/socfpga/socfpga_gpio.c b/sys/arm/altera/socfpga/socfpga_gpio.c index 196c1e94b718..1a50c7c33bfb 100644 --- a/sys/arm/altera/socfpga/socfpga_gpio.c +++ b/sys/arm/altera/socfpga/socfpga_gpio.c @@ -163,6 +163,7 @@ socfpga_gpio_attach(device_t dev) if (bus_alloc_resources(dev, socfpga_gpio_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); + mtx_destroy(&sc->sc_mtx); return (ENXIO); } diff --git a/sys/arm/freescale/imx/imx_gpio.c b/sys/arm/freescale/imx/imx_gpio.c index c23f75be3f7c..911c9bbbf748 100644 --- a/sys/arm/freescale/imx/imx_gpio.c +++ b/sys/arm/freescale/imx/imx_gpio.c @@ -389,6 +389,8 @@ imx51_gpio_attach(device_t dev) if (bus_alloc_resources(dev, imx_gpio_spec, sc->sc_res)) { device_printf(dev, "could not allocate resources\n"); + bus_release_resources(dev, imx_gpio_spec, sc->sc_res); + mtx_destroy(&sc->sc_mtx); return (ENXIO); } @@ -411,6 +413,7 @@ imx51_gpio_attach(device_t dev) imx51_gpio_intr, NULL, sc, &sc->gpio_ih[irq]))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); + imx51_gpio_detach(dev); return (ENXIO); } } @@ -434,6 +437,7 @@ imx51_gpio_attach(device_t dev) static int imx51_gpio_detach(device_t dev) { + int irq; struct imx51_gpio_softc *sc; sc = device_get_softc(dev); @@ -441,13 +445,12 @@ imx51_gpio_detach(device_t dev) KASSERT(mtx_initialized(&sc->sc_mtx), ("gpio mutex not initialized")); bus_generic_detach(dev); - - if (sc->sc_res[3]) - bus_release_resources(dev, imx_gpio0irq_spec, &sc->sc_res[3]); - - if (sc->sc_res[0]) - bus_release_resources(dev, imx_gpio_spec, sc->sc_res); - + for (irq = 1; irq <= sc->sc_l_irq; irq ++) { + if (sc->gpio_ih[irq]) + bus_teardown_intr(dev, sc->sc_res[irq], sc->gpio_ih[irq]); + } + bus_release_resources(dev, imx_gpio0irq_spec, &sc->sc_res[3]); + bus_release_resources(dev, imx_gpio_spec, sc->sc_res); mtx_destroy(&sc->sc_mtx); return(0); diff --git a/sys/arm/freescale/vybrid/vf_gpio.c b/sys/arm/freescale/vybrid/vf_gpio.c index a31ff782aacf..1fcf32903d09 100644 --- a/sys/arm/freescale/vybrid/vf_gpio.c +++ b/sys/arm/freescale/vybrid/vf_gpio.c @@ -125,6 +125,7 @@ vf_gpio_attach(device_t dev) if (bus_alloc_resources(dev, vf_gpio_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); + mtx_destroy(&sc->sc_mtx); return (ENXIO); } diff --git a/sys/arm/rockchip/rk30xx_gpio.c b/sys/arm/rockchip/rk30xx_gpio.c index 6eb7db742b99..0728dbf09891 100644 --- a/sys/arm/rockchip/rk30xx_gpio.c +++ b/sys/arm/rockchip/rk30xx_gpio.c @@ -399,13 +399,14 @@ rk30_gpio_attach(device_t dev) if (rk30_gpio_sc) return (ENXIO); sc->sc_dev = dev; + mtx_init(&sc->sc_mtx, "rk30 gpio", "gpio", MTX_DEF); rid = 0; sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!sc->sc_mem_res) { device_printf(dev, "cannot allocate memory window\n"); - return (ENXIO); + goto fail; } sc->sc_bst = rman_get_bustag(sc->sc_mem_res); sc->sc_bsh = rman_get_bushandle(sc->sc_mem_res); @@ -421,17 +422,15 @@ rk30_gpio_attach(device_t dev) if (sc->sc_bank == -1) { device_printf(dev, "unsupported device unit (only GPIO0..3 are supported)\n"); - bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); - return (ENXIO); + goto fail; } rid = 0; sc->sc_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (!sc->sc_irq_res) { - bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); device_printf(dev, "cannot allocate interrupt\n"); - return (ENXIO); + goto fail; } /* Find our node. */ @@ -441,8 +440,6 @@ rk30_gpio_attach(device_t dev) /* Node is not a GPIO controller. */ goto fail; - mtx_init(&sc->sc_mtx, "rk30 gpio", "gpio", MTX_DEF); - /* Initialize the software controlled pins. */ for (i = 0; i < RK30_GPIO_PINS; i++) { snprintf(sc->sc_gpio_pins[i].gp_name, GPIOMAXNAME, @@ -467,6 +464,8 @@ fail: bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq_res); if (sc->sc_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mem_res); + mtx_destroy(&sc->sc_mtx); + return (ENXIO); } diff --git a/sys/arm/samsung/exynos/exynos5_pad.c b/sys/arm/samsung/exynos/exynos5_pad.c index c28ebb1c68e3..0e58bf56299e 100644 --- a/sys/arm/samsung/exynos/exynos5_pad.c +++ b/sys/arm/samsung/exynos/exynos5_pad.c @@ -509,12 +509,12 @@ pad_attach(device_t dev) sc->nports = 5; break; default: - return (-1); + goto fail; }; if (bus_alloc_resources(dev, sc->pad_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); - return (ENXIO); + goto fail; } /* Memory interface */ @@ -534,9 +534,9 @@ pad_attach(device_t dev) NULL, sc, &sc->gpio_ih[i]))) { device_printf(dev, "ERROR: Unable to register interrupt handler\n"); - return (ENXIO); + goto fail; } - }; + } for (i = 0; i < sc->gpio_npins; i++) { sc->gpio_pins[i].gp_pin = i; @@ -563,6 +563,17 @@ pad_attach(device_t dev) device_add_child(dev, "gpiobus", -1); return (bus_generic_attach(dev)); + +fail: + for (i = 0; i < sc->nports; i++) { + if (sc->gpio_ih[i]) + bus_teardown_intr(dev, sc->res[sc->nports + i], + sc->gpio_ih[i]); + } + bus_release_resources(dev, sc->pad_spec, sc->res); + mtx_destroy(&sc->sc_mtx); + + return (ENXIO); } static int diff --git a/sys/arm/ti/ti_gpio.c b/sys/arm/ti/ti_gpio.c index ed24958f63ec..8874ff35046e 100644 --- a/sys/arm/ti/ti_gpio.c +++ b/sys/arm/ti/ti_gpio.c @@ -113,6 +113,7 @@ __FBSDID("$FreeBSD$"); #define TI_GPIO_MASK(p) (1U << ((p) % PINS_PER_BANK)) static struct ti_gpio_softc *ti_gpio_sc = NULL; +static int ti_gpio_detach(device_t); static u_int ti_max_gpio_banks(void) @@ -763,21 +764,21 @@ ti_gpio_attach(device_t dev) */ if (bus_alloc_resources(dev, ti_gpio_mem_spec, sc->sc_mem_res) != 0) { device_printf(dev, "Error: could not allocate mem resources\n"); + ti_gpio_detach(dev); return (ENXIO); } /* Request the IRQ resources */ if (bus_alloc_resources(dev, ti_gpio_irq_spec, sc->sc_irq_res) != 0) { - bus_release_resources(dev, ti_gpio_mem_spec, sc->sc_mem_res); device_printf(dev, "Error: could not allocate irq resources\n"); + ti_gpio_detach(dev); return (ENXIO); } /* Setup the IRQ resources */ if (ti_gpio_attach_intr(dev) != 0) { - ti_gpio_detach_intr(dev); - bus_release_resources(dev, ti_gpio_irq_spec, sc->sc_irq_res); - bus_release_resources(dev, ti_gpio_mem_spec, sc->sc_mem_res); + device_printf(dev, "Error: could not setup irq handlers\n"); + ti_gpio_detach(dev); return (ENXIO); } @@ -809,11 +810,7 @@ ti_gpio_attach(device_t dev) /* Initialize the GPIO module. */ err = ti_gpio_bank_init(dev, i); if (err != 0) { - ti_gpio_detach_intr(dev); - bus_release_resources(dev, ti_gpio_irq_spec, - sc->sc_irq_res); - bus_release_resources(dev, ti_gpio_mem_spec, - sc->sc_mem_res); + ti_gpio_detach(dev); return (err); } } @@ -852,18 +849,17 @@ ti_gpio_detach(device_t dev) if (sc->sc_mem_res[i] != NULL) ti_gpio_intr_clr(sc, i, 0xffffffff); } - bus_generic_detach(dev); - - free(sc->sc_events, M_DEVBUF); - free(sc->sc_irq_polarity, M_DEVBUF); - free(sc->sc_irq_trigger, M_DEVBUF); - + if (sc->sc_events) + free(sc->sc_events, M_DEVBUF); + if (sc->sc_irq_polarity) + free(sc->sc_irq_polarity, M_DEVBUF); + if (sc->sc_irq_trigger) + free(sc->sc_irq_trigger, M_DEVBUF); /* Release the memory and IRQ resources. */ ti_gpio_detach_intr(dev); bus_release_resources(dev, ti_gpio_irq_spec, sc->sc_irq_res); bus_release_resources(dev, ti_gpio_mem_spec, sc->sc_mem_res); - TI_GPIO_LOCK_DESTROY(sc); return (0); diff --git a/sys/arm/ti/ti_pruss.c b/sys/arm/ti/ti_pruss.c index 03a76761b2be..9a981d80906e 100644 --- a/sys/arm/ti/ti_pruss.c +++ b/sys/arm/ti/ti_pruss.c @@ -258,6 +258,7 @@ ti_pruss_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, if (offset > rman_get_size(sc->sc_mem_res)) return (-1); *paddr = rman_get_start(sc->sc_mem_res) + offset; + *memattr = VM_MEMATTR_UNCACHEABLE; return (0); } diff --git a/sys/boot/common/load_elf.c b/sys/boot/common/load_elf.c index 4c801e9e363b..6ab2ba4b135e 100644 --- a/sys/boot/common/load_elf.c +++ b/sys/boot/common/load_elf.c @@ -175,7 +175,33 @@ __elfN(loadfile_raw)(char *filename, u_int64_t dest, * Check to see what sort of module we are. */ kfp = file_findfile(NULL, __elfN(kerneltype)); - if (ehdr->e_type == ET_DYN) { +#ifdef __powerpc__ + /* + * Kernels can be ET_DYN, so just assume the first loaded object is the + * kernel. This assumption will be checked later. + */ + if (kfp == NULL) + ef.kernel = 1; +#endif + if (ef.kernel || ehdr->e_type == ET_EXEC) { + /* Looks like a kernel */ + if (kfp != NULL) { + printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: kernel already loaded\n"); + err = EPERM; + goto oerr; + } + /* + * Calculate destination address based on kernel entrypoint + */ + dest = (ehdr->e_entry & ~PAGE_MASK); + if (dest == 0) { + printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: not a kernel (maybe static binary?)\n"); + err = EPERM; + goto oerr; + } + ef.kernel = 1; + + } else if (ehdr->e_type == ET_DYN) { /* Looks like a kld module */ if (multiboot != 0) { printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: can't load module as multiboot\n"); @@ -195,24 +221,6 @@ __elfN(loadfile_raw)(char *filename, u_int64_t dest, /* Looks OK, got ahead */ ef.kernel = 0; - } else if (ehdr->e_type == ET_EXEC) { - /* Looks like a kernel */ - if (kfp != NULL) { - printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: kernel already loaded\n"); - err = EPERM; - goto oerr; - } - /* - * Calculate destination address based on kernel entrypoint - */ - dest = (ehdr->e_entry & ~PAGE_MASK); - if (dest == 0) { - printf("elf" __XSTRING(__ELF_WORD_SIZE) "_loadfile: not a kernel (maybe static binary?)\n"); - err = EPERM; - goto oerr; - } - ef.kernel = 1; - } else { err = EFTYPE; goto oerr; diff --git a/sys/boot/i386/libi386/smbios.c b/sys/boot/i386/libi386/smbios.c index 0d5eb7b04ec0..570111ccb79f 100644 --- a/sys/boot/i386/libi386/smbios.c +++ b/sys/boot/i386/libi386/smbios.c @@ -323,6 +323,29 @@ smbios_parse_table(const caddr_t addr) return (cp + 2); } +static caddr_t +smbios_find_struct(int type) +{ + caddr_t dmi; + int i; + + if (smbios.addr == NULL) + return (NULL); + + for (dmi = smbios.addr, i = 0; + dmi < smbios.addr + smbios.length && i < smbios.count; i++) { + if (SMBIOS_GET8(dmi, 0) == type) + return dmi; + /* Find structure terminator. */ + dmi = SMBIOS_GETSTR(dmi); + while (SMBIOS_GET16(dmi, 0) != 0) + dmi++; + dmi += 2; + } + + return (NULL); +} + static void smbios_probe(void) { @@ -368,29 +391,6 @@ smbios_probe(void) } } -static caddr_t -smbios_find_struct(int type) -{ - caddr_t dmi; - int i; - - if (smbios.addr == NULL) - return (NULL); - - for (dmi = smbios.addr, i = 0; - dmi < smbios.addr + smbios.length && i < smbios.count; i++) { - if (SMBIOS_GET8(dmi, 0) == type) - return dmi; - /* Find structure terminator. */ - dmi = SMBIOS_GETSTR(dmi); - while (SMBIOS_GET16(dmi, 0) != 0) - dmi++; - dmi += 2; - } - - return (NULL); -} - void smbios_detect(void) { diff --git a/sys/cam/ctl/ctl_frontend_iscsi.c b/sys/cam/ctl/ctl_frontend_iscsi.c index 0114b57b8247..99d698b245a3 100644 --- a/sys/cam/ctl/ctl_frontend_iscsi.c +++ b/sys/cam/ctl/ctl_frontend_iscsi.c @@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$"); #include <cam/ctl/ctl_private.h> #include <dev/iscsi/icl.h> +#include <dev/iscsi/icl_wrappers.h> #include <dev/iscsi/iscsi_proto.h> #include <cam/ctl/ctl_frontend_iscsi.h> @@ -1241,7 +1242,7 @@ cfiscsi_session_new(struct cfiscsi_softc *softc) cv_init(&cs->cs_login_cv, "cfiscsi_login"); #endif - cs->cs_conn = icl_conn_new("cfiscsi", &cs->cs_lock); + cs->cs_conn = icl_new_conn(NULL, "cfiscsi", &cs->cs_lock); cs->cs_conn->ic_receive = cfiscsi_receive_callback; cs->cs_conn->ic_error = cfiscsi_error_callback; cs->cs_conn->ic_prv0 = cs; @@ -2013,6 +2014,7 @@ cfiscsi_ioctl_port_create(struct ctl_req *req) return; } port = &ct->ct_port; + // WAT if (ct->ct_state == CFISCSI_TARGET_STATE_DYING) goto done; diff --git a/sys/conf/files b/sys/conf/files index ee8a5381be9c..38f96a29e4b4 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1521,6 +1521,7 @@ ipw_monitor.fw optional ipwmonitorfw | ipwfw \ clean "ipw_monitor.fw" dev/iscsi/icl.c optional iscsi | ctl dev/iscsi/icl_proxy.c optional iscsi | ctl +dev/iscsi/icl_soft.c optional iscsi | ctl dev/iscsi/iscsi.c optional iscsi scbus dev/iscsi_initiator/iscsi.c optional iscsi_initiator scbus dev/iscsi_initiator/iscsi_subr.c optional iscsi_initiator scbus diff --git a/sys/dev/drm2/i915/intel_display.c b/sys/dev/drm2/i915/intel_display.c index 86c3a4de54cf..a844c7f55f49 100644 --- a/sys/dev/drm2/i915/intel_display.c +++ b/sys/dev/drm2/i915/intel_display.c @@ -6995,7 +6995,7 @@ static void ivb_pch_pwm_override(struct drm_device *dev) */ I915_WRITE(BLC_PWM_CPU_CTL2, PWM_ENABLE); I915_WRITE(BLC_PWM_CPU_CTL, 0); - I915_WRITE(BLC_PWM_PCH_CTL1, PWM_ENABLE | (1<<30)); + I915_WRITE(BLC_PWM_PCH_CTL1, PWM_ENABLE); } void intel_modeset_init_hw(struct drm_device *dev) diff --git a/sys/dev/iscsi/icl.c b/sys/dev/iscsi/icl.c index 7f5d01f5332e..a60313cf857b 100644 --- a/sys/dev/iscsi/icl.c +++ b/sys/dev/iscsi/icl.c @@ -37,1414 +37,199 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/capsicum.h> #include <sys/condvar.h> #include <sys/conf.h> -#include <sys/file.h> -#include <sys/kernel.h> -#include <sys/kthread.h> #include <sys/lock.h> -#include <sys/mbuf.h> +#include <sys/kernel.h> +#include <sys/malloc.h> #include <sys/mutex.h> #include <sys/module.h> -#include <sys/protosw.h> -#include <sys/socket.h> -#include <sys/socketvar.h> +#include <sys/queue.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/sx.h> -#include <sys/uio.h> -#include <vm/uma.h> -#include <netinet/in.h> -#include <netinet/tcp.h> #include <dev/iscsi/icl.h> -#include <dev/iscsi/iscsi_proto.h> +#include <icl_conn_if.h> + +struct icl_module { + TAILQ_ENTRY(icl_module) im_next; + char *im_name; + int im_priority; + int (*im_limits)(size_t *limitp); + struct icl_conn *(*im_new_conn)(const char *name, + struct mtx *lock); +}; + +struct icl_softc { + struct sx sc_lock; + TAILQ_HEAD(, icl_module) sc_modules; +}; SYSCTL_NODE(_kern, OID_AUTO, icl, CTLFLAG_RD, 0, "iSCSI Common Layer"); -static int debug = 1; +int icl_debug = 1; SYSCTL_INT(_kern_icl, OID_AUTO, debug, CTLFLAG_RWTUN, - &debug, 0, "Enable debug messages"); -static int coalesce = 1; -SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, - &coalesce, 0, "Try to coalesce PDUs before sending"); -static int partial_receive_len = 128 * 1024; -SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, - &partial_receive_len, 0, "Minimum read size for partially received " - "data segment"); -static int sendspace = 1048576; -SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, - &sendspace, 0, "Default send socket buffer size"); -static int recvspace = 1048576; -SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, - &recvspace, 0, "Default receive socket buffer size"); - -static uma_zone_t icl_conn_zone; -static uma_zone_t icl_pdu_zone; - -static volatile u_int icl_ncons; - -#define ICL_DEBUG(X, ...) \ - do { \ - if (debug > 1) \ - printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ - } while (0) - -#define ICL_WARN(X, ...) \ - do { \ - if (debug > 0) { \ - printf("WARNING: %s: " X "\n", \ - __func__, ## __VA_ARGS__); \ - } \ - } while (0) - -#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) -#define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) -#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) -#define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) - -STAILQ_HEAD(icl_pdu_stailq, icl_pdu); - -static void -icl_conn_fail(struct icl_conn *ic) -{ - if (ic->ic_socket == NULL) - return; - - /* - * XXX - */ - ic->ic_socket->so_error = EDOOFUS; - (ic->ic_error)(ic); -} - -static struct mbuf * -icl_conn_receive(struct icl_conn *ic, size_t len) -{ - struct uio uio; - struct socket *so; - struct mbuf *m; - int error, flags; - - so = ic->ic_socket; - - memset(&uio, 0, sizeof(uio)); - uio.uio_resid = len; - - flags = MSG_DONTWAIT; - error = soreceive(so, NULL, &uio, &m, NULL, &flags); - if (error != 0) { - ICL_DEBUG("soreceive error %d", error); - return (NULL); - } - if (uio.uio_resid != 0) { - m_freem(m); - ICL_DEBUG("short read"); - return (NULL); - } - - return (m); -} - -static struct icl_pdu * -icl_pdu_new_empty(struct icl_conn *ic, int flags) -{ - struct icl_pdu *ip; - -#ifdef DIAGNOSTIC - refcount_acquire(&ic->ic_outstanding_pdus); -#endif - ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); - if (ip == NULL) { - ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); -#ifdef DIAGNOSTIC - refcount_release(&ic->ic_outstanding_pdus); -#endif - return (NULL); - } - - ip->ip_conn = ic; - - return (ip); -} - -void -icl_pdu_free(struct icl_pdu *ip) -{ - struct icl_conn *ic; - - ic = ip->ip_conn; - - m_freem(ip->ip_bhs_mbuf); - m_freem(ip->ip_ahs_mbuf); - m_freem(ip->ip_data_mbuf); - uma_zfree(icl_pdu_zone, ip); -#ifdef DIAGNOSTIC - refcount_release(&ic->ic_outstanding_pdus); -#endif -} - -/* - * Allocate icl_pdu with empty BHS to fill up by the caller. - */ -struct icl_pdu * -icl_pdu_new(struct icl_conn *ic, int flags) -{ - struct icl_pdu *ip; - - ip = icl_pdu_new_empty(ic, flags); - if (ip == NULL) - return (NULL); - - ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), - flags, MT_DATA, M_PKTHDR); - if (ip->ip_bhs_mbuf == NULL) { - ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); - icl_pdu_free(ip); - return (NULL); - } - ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); - memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); - ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); - - return (ip); -} - -static int -icl_pdu_ahs_length(const struct icl_pdu *request) -{ - - return (request->ip_bhs->bhs_total_ahs_len * 4); -} - -size_t -icl_pdu_data_segment_length(const struct icl_pdu *request) -{ - uint32_t len = 0; - - len += request->ip_bhs->bhs_data_segment_len[0]; - len <<= 8; - len += request->ip_bhs->bhs_data_segment_len[1]; - len <<= 8; - len += request->ip_bhs->bhs_data_segment_len[2]; - - return (len); -} - -static void -icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) -{ - - response->ip_bhs->bhs_data_segment_len[2] = len; - response->ip_bhs->bhs_data_segment_len[1] = len >> 8; - response->ip_bhs->bhs_data_segment_len[0] = len >> 16; -} - -static size_t -icl_pdu_padding(const struct icl_pdu *ip) -{ - - if ((ip->ip_data_len % 4) != 0) - return (4 - (ip->ip_data_len % 4)); - - return (0); -} - -static size_t -icl_pdu_size(const struct icl_pdu *response) -{ - size_t len; - - KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); - - len = sizeof(struct iscsi_bhs) + response->ip_data_len + - icl_pdu_padding(response); - if (response->ip_conn->ic_header_crc32c) - len += ISCSI_HEADER_DIGEST_SIZE; - if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) - len += ISCSI_DATA_DIGEST_SIZE; - - return (len); -} - -static int -icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) -{ - struct mbuf *m; - - m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); - if (m == NULL) { - ICL_DEBUG("failed to receive BHS"); - return (-1); - } - - request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); - if (request->ip_bhs_mbuf == NULL) { - ICL_WARN("m_pullup failed"); - return (-1); - } - request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); - - /* - * XXX: For architectures with strict alignment requirements - * we may need to allocate ip_bhs and copy the data into it. - * For some reason, though, not doing this doesn't seem - * to cause problems; tested on sparc64. - */ - - *availablep -= sizeof(struct iscsi_bhs); - return (0); -} - -static int -icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) -{ - - request->ip_ahs_len = icl_pdu_ahs_length(request); - if (request->ip_ahs_len == 0) - return (0); - - request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, - request->ip_ahs_len); - if (request->ip_ahs_mbuf == NULL) { - ICL_DEBUG("failed to receive AHS"); - return (-1); - } - - *availablep -= request->ip_ahs_len; - return (0); -} - -static uint32_t -icl_mbuf_to_crc32c(const struct mbuf *m0) -{ - uint32_t digest = 0xffffffff; - const struct mbuf *m; - - for (m = m0; m != NULL; m = m->m_next) - digest = calculate_crc32c(digest, - mtod(m, const void *), m->m_len); - - digest = digest ^ 0xffffffff; + &icl_debug, 0, "Enable debug messages"); - return (digest); -} +static MALLOC_DEFINE(M_ICL, "icl", "iSCSI Common Layer"); +static struct icl_softc *sc; -static int -icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) +static struct icl_module * +icl_find(const char *name) { - struct mbuf *m; - uint32_t received_digest, valid_digest; + struct icl_module *im, *im_max; - if (request->ip_conn->ic_header_crc32c == false) - return (0); - - m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); - if (m == NULL) { - ICL_DEBUG("failed to receive header digest"); - return (-1); - } - - CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); - m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); - m_freem(m); - - *availablep -= ISCSI_HEADER_DIGEST_SIZE; + sx_assert(&sc->sc_lock, SA_LOCKED); /* - * XXX: Handle AHS. + * If the name was not specified, pick a module with highest + * priority. */ - valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); - if (received_digest != valid_digest) { - ICL_WARN("header digest check failed; got 0x%x, " - "should be 0x%x", received_digest, valid_digest); - return (-1); - } - - return (0); -} - -/* - * Return the number of bytes that should be waiting in the receive socket - * before icl_pdu_receive_data_segment() gets called. - */ -static size_t -icl_pdu_data_segment_receive_len(const struct icl_pdu *request) -{ - size_t len; - - len = icl_pdu_data_segment_length(request); - if (len == 0) - return (0); - - /* - * Account for the parts of data segment already read from - * the socket buffer. - */ - KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); - len -= request->ip_data_len; - - /* - * Don't always wait for the full data segment to be delivered - * to the socket; this might badly affect performance due to - * TCP window scaling. - */ - if (len > partial_receive_len) { -#if 0 - ICL_DEBUG("need %zd bytes of data, limiting to %zd", - len, partial_receive_len)); -#endif - len = partial_receive_len; - - return (len); - } - - /* - * Account for padding. Note that due to the way code is written, - * the icl_pdu_receive_data_segment() must always receive padding - * along with the last part of data segment, because it would be - * impossible to tell whether we've already received the full data - * segment including padding, or without it. - */ - if ((len % 4) != 0) - len += 4 - (len % 4); - -#if 0 - ICL_DEBUG("need %zd bytes of data", len)); -#endif - - return (len); -} - -static int -icl_pdu_receive_data_segment(struct icl_pdu *request, - size_t *availablep, bool *more_neededp) -{ - struct icl_conn *ic; - size_t len, padding = 0; - struct mbuf *m; - - ic = request->ip_conn; - - *more_neededp = false; - ic->ic_receive_len = 0; - - len = icl_pdu_data_segment_length(request); - if (len == 0) - return (0); - - if ((len % 4) != 0) - padding = 4 - (len % 4); - - /* - * Account for already received parts of data segment. - */ - KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); - len -= request->ip_data_len; - - if (len + padding > *availablep) { - /* - * Not enough data in the socket buffer. Receive as much - * as we can. Don't receive padding, since, obviously, it's - * not the end of data segment yet. - */ -#if 0 - ICL_DEBUG("limited from %zd to %zd", - len + padding, *availablep - padding)); -#endif - len = *availablep - padding; - *more_neededp = true; - padding = 0; - } - - /* - * Must not try to receive padding without at least one byte - * of actual data segment. - */ - if (len > 0) { - m = icl_conn_receive(request->ip_conn, len + padding); - if (m == NULL) { - ICL_DEBUG("failed to receive data segment"); - return (-1); + if (name == NULL || name[0] == '\0') { + im_max = TAILQ_FIRST(&sc->sc_modules); + TAILQ_FOREACH(im, &sc->sc_modules, im_next) { + if (im->im_priority > im_max->im_priority) + im_max = im; } - if (request->ip_data_mbuf == NULL) - request->ip_data_mbuf = m; - else - m_cat(request->ip_data_mbuf, m); - - request->ip_data_len += len; - *availablep -= len + padding; - } else - ICL_DEBUG("len 0"); - - if (*more_neededp) - ic->ic_receive_len = - icl_pdu_data_segment_receive_len(request); - - return (0); -} - -static int -icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) -{ - struct mbuf *m; - uint32_t received_digest, valid_digest; - - if (request->ip_conn->ic_data_crc32c == false) - return (0); - - if (request->ip_data_len == 0) - return (0); - - m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); - if (m == NULL) { - ICL_DEBUG("failed to receive data digest"); - return (-1); - } - - CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); - m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); - m_freem(m); - - *availablep -= ISCSI_DATA_DIGEST_SIZE; - - /* - * Note that ip_data_mbuf also contains padding; since digest - * calculation is supposed to include that, we iterate over - * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. - */ - valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); - if (received_digest != valid_digest) { - ICL_WARN("data digest check failed; got 0x%x, " - "should be 0x%x", received_digest, valid_digest); - return (-1); - } - - return (0); -} - -/* - * Somewhat contrary to the name, this attempts to receive only one - * "part" of PDU at a time; call it repeatedly until it returns non-NULL. - */ -static struct icl_pdu * -icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) -{ - struct icl_pdu *request; - struct socket *so; - size_t len; - int error; - bool more_needed; - - so = ic->ic_socket; - - if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { - KASSERT(ic->ic_receive_pdu == NULL, - ("ic->ic_receive_pdu != NULL")); - request = icl_pdu_new_empty(ic, M_NOWAIT); - if (request == NULL) { - ICL_DEBUG("failed to allocate PDU; " - "dropping connection"); - icl_conn_fail(ic); - return (NULL); - } - ic->ic_receive_pdu = request; - } else { - KASSERT(ic->ic_receive_pdu != NULL, - ("ic->ic_receive_pdu == NULL")); - request = ic->ic_receive_pdu; - } - - if (*availablep < ic->ic_receive_len) { -#if 0 - ICL_DEBUG("not enough data; need %zd, " - "have %zd", ic->ic_receive_len, *availablep); -#endif - return (NULL); - } - - switch (ic->ic_receive_state) { - case ICL_CONN_STATE_BHS: - //ICL_DEBUG("receiving BHS"); - error = icl_pdu_receive_bhs(request, availablep); - if (error != 0) { - ICL_DEBUG("failed to receive BHS; " - "dropping connection"); - break; - } - - /* - * We don't enforce any limit for AHS length; - * its length is stored in 8 bit field. - */ - - len = icl_pdu_data_segment_length(request); - if (len > ic->ic_max_data_segment_length) { - ICL_WARN("received data segment " - "length %zd is larger than negotiated " - "MaxDataSegmentLength %zd; " - "dropping connection", - len, ic->ic_max_data_segment_length); - error = EINVAL; - break; - } - - ic->ic_receive_state = ICL_CONN_STATE_AHS; - ic->ic_receive_len = icl_pdu_ahs_length(request); - break; - - case ICL_CONN_STATE_AHS: - //ICL_DEBUG("receiving AHS"); - error = icl_pdu_receive_ahs(request, availablep); - if (error != 0) { - ICL_DEBUG("failed to receive AHS; " - "dropping connection"); - break; - } - ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; - if (ic->ic_header_crc32c == false) - ic->ic_receive_len = 0; - else - ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; - break; - - case ICL_CONN_STATE_HEADER_DIGEST: - //ICL_DEBUG("receiving header digest"); - error = icl_pdu_check_header_digest(request, availablep); - if (error != 0) { - ICL_DEBUG("header digest failed; " - "dropping connection"); - break; - } - - ic->ic_receive_state = ICL_CONN_STATE_DATA; - ic->ic_receive_len = - icl_pdu_data_segment_receive_len(request); - break; - - case ICL_CONN_STATE_DATA: - //ICL_DEBUG("receiving data segment"); - error = icl_pdu_receive_data_segment(request, availablep, - &more_needed); - if (error != 0) { - ICL_DEBUG("failed to receive data segment;" - "dropping connection"); - break; - } - - if (more_needed) - break; - - ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; - if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) - ic->ic_receive_len = 0; - else - ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; - break; - - case ICL_CONN_STATE_DATA_DIGEST: - //ICL_DEBUG("receiving data digest"); - error = icl_pdu_check_data_digest(request, availablep); - if (error != 0) { - ICL_DEBUG("data digest failed; " - "dropping connection"); - break; - } - - /* - * We've received complete PDU; reset the receive state machine - * and return the PDU. - */ - ic->ic_receive_state = ICL_CONN_STATE_BHS; - ic->ic_receive_len = sizeof(struct iscsi_bhs); - ic->ic_receive_pdu = NULL; - return (request); - - default: - panic("invalid ic_receive_state %d\n", ic->ic_receive_state); + return (im_max); } - if (error != 0) { - /* - * Don't free the PDU; it's pointed to by ic->ic_receive_pdu - * and will get freed in icl_conn_close(). - */ - icl_conn_fail(ic); + TAILQ_FOREACH(im, &sc->sc_modules, im_next) { + if (strcmp(im->im_name, name) == 0) + return (im); } return (NULL); } -static void -icl_conn_receive_pdus(struct icl_conn *ic, size_t available) -{ - struct icl_pdu *response; - struct socket *so; - - so = ic->ic_socket; - - /* - * This can never happen; we're careful to only mess with ic->ic_socket - * pointer when the send/receive threads are not running. - */ - KASSERT(so != NULL, ("NULL socket")); - - for (;;) { - if (ic->ic_disconnecting) - return; - - if (so->so_error != 0) { - ICL_DEBUG("connection error %d; " - "dropping connection", so->so_error); - icl_conn_fail(ic); - return; - } - - /* - * Loop until we have a complete PDU or there is not enough - * data in the socket buffer. - */ - if (available < ic->ic_receive_len) { -#if 0 - ICL_DEBUG("not enough data; have %zd, " - "need %zd", available, - ic->ic_receive_len); -#endif - return; - } - - response = icl_conn_receive_pdu(ic, &available); - if (response == NULL) - continue; - - if (response->ip_ahs_len > 0) { - ICL_WARN("received PDU with unsupported " - "AHS; opcode 0x%x; dropping connection", - response->ip_bhs->bhs_opcode); - icl_pdu_free(response); - icl_conn_fail(ic); - return; - } - - (ic->ic_receive)(response); - } -} - -static void -icl_receive_thread(void *arg) -{ - struct icl_conn *ic; - size_t available; - struct socket *so; - - ic = arg; - so = ic->ic_socket; - - ICL_CONN_LOCK(ic); - ic->ic_receive_running = true; - ICL_CONN_UNLOCK(ic); - - for (;;) { - if (ic->ic_disconnecting) { - //ICL_DEBUG("terminating"); - break; - } - - /* - * Set the low watermark, to be checked by - * soreadable() in icl_soupcall_receive() - * to avoid unneccessary wakeups until there - * is enough data received to read the PDU. - */ - SOCKBUF_LOCK(&so->so_rcv); - available = sbavail(&so->so_rcv); - if (available < ic->ic_receive_len) { - so->so_rcv.sb_lowat = ic->ic_receive_len; - cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); - } else - so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; - SOCKBUF_UNLOCK(&so->so_rcv); - - icl_conn_receive_pdus(ic, available); - } - - ICL_CONN_LOCK(ic); - ic->ic_receive_running = false; - cv_signal(&ic->ic_send_cv); - ICL_CONN_UNLOCK(ic); - kthread_exit(); -} - -static int -icl_soupcall_receive(struct socket *so, void *arg, int waitflag) -{ - struct icl_conn *ic; - - if (!soreadable(so)) - return (SU_OK); - - ic = arg; - cv_signal(&ic->ic_receive_cv); - return (SU_OK); -} - -static int -icl_pdu_finalize(struct icl_pdu *request) +struct icl_conn * +icl_new_conn(const char *offload, const char *name, struct mtx *lock) { - size_t padding, pdu_len; - uint32_t digest, zero = 0; - int ok; + struct icl_module *im; struct icl_conn *ic; - ic = request->ip_conn; - - icl_pdu_set_data_segment_length(request, request->ip_data_len); - - pdu_len = icl_pdu_size(request); - - if (ic->ic_header_crc32c) { - digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); - ok = m_append(request->ip_bhs_mbuf, sizeof(digest), - (void *)&digest); - if (ok != 1) { - ICL_WARN("failed to append header digest"); - return (1); - } - } - - if (request->ip_data_len != 0) { - padding = icl_pdu_padding(request); - if (padding > 0) { - ok = m_append(request->ip_data_mbuf, padding, - (void *)&zero); - if (ok != 1) { - ICL_WARN("failed to append padding"); - return (1); - } - } - - if (ic->ic_data_crc32c) { - digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); - - ok = m_append(request->ip_data_mbuf, sizeof(digest), - (void *)&digest); - if (ok != 1) { - ICL_WARN("failed to append data digest"); - return (1); - } - } + sx_slock(&sc->sc_lock); + im = icl_find(offload); - m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); - request->ip_data_mbuf = NULL; + if (im == NULL) { + ICL_WARN("offload \"%s\" not found", offload); + sx_sunlock(&sc->sc_lock); + return (NULL); } - request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; - - return (0); -} - -static void -icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) -{ - struct icl_pdu *request, *request2; - struct socket *so; - size_t available, size, size2; - int coalesced, error; + ic = im->im_new_conn(name, lock); + sx_sunlock(&sc->sc_lock); - ICL_CONN_LOCK_ASSERT_NOT(ic); - - so = ic->ic_socket; - - SOCKBUF_LOCK(&so->so_snd); - /* - * Check how much space do we have for transmit. We can't just - * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, - * as it always frees the mbuf chain passed to it, even in case - * of error. - */ - available = sbspace(&so->so_snd); - - /* - * Notify the socket upcall that we don't need wakeups - * for the time being. - */ - so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; - SOCKBUF_UNLOCK(&so->so_snd); - - while (!STAILQ_EMPTY(queue)) { - request = STAILQ_FIRST(queue); - size = icl_pdu_size(request); - if (available < size) { - - /* - * Set the low watermark, to be checked by - * sowriteable() in icl_soupcall_send() - * to avoid unneccessary wakeups until there - * is enough space for the PDU to fit. - */ - SOCKBUF_LOCK(&so->so_snd); - available = sbspace(&so->so_snd); - if (available < size) { -#if 1 - ICL_DEBUG("no space to send; " - "have %zd, need %zd", - available, size); -#endif - so->so_snd.sb_lowat = size; - SOCKBUF_UNLOCK(&so->so_snd); - return; - } - SOCKBUF_UNLOCK(&so->so_snd); - } - STAILQ_REMOVE_HEAD(queue, ip_next); - error = icl_pdu_finalize(request); - if (error != 0) { - ICL_DEBUG("failed to finalize PDU; " - "dropping connection"); - icl_conn_fail(ic); - icl_pdu_free(request); - return; - } - if (coalesce) { - coalesced = 1; - for (;;) { - request2 = STAILQ_FIRST(queue); - if (request2 == NULL) - break; - size2 = icl_pdu_size(request2); - if (available < size + size2) - break; - STAILQ_REMOVE_HEAD(queue, ip_next); - error = icl_pdu_finalize(request2); - if (error != 0) { - ICL_DEBUG("failed to finalize PDU; " - "dropping connection"); - icl_conn_fail(ic); - icl_pdu_free(request); - icl_pdu_free(request2); - return; - } - m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); - request2->ip_bhs_mbuf = NULL; - request->ip_bhs_mbuf->m_pkthdr.len += size2; - size += size2; - STAILQ_REMOVE_AFTER(queue, request, ip_next); - icl_pdu_free(request2); - coalesced++; - } -#if 0 - if (coalesced > 1) { - ICL_DEBUG("coalesced %d PDUs into %zd bytes", - coalesced, size); - } -#endif - } - available -= size; - error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, - NULL, MSG_DONTWAIT, curthread); - request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ - if (error != 0) { - ICL_DEBUG("failed to send PDU, error %d; " - "dropping connection", error); - icl_conn_fail(ic); - icl_pdu_free(request); - return; - } - icl_pdu_free(request); - } + return (ic); } -static void -icl_send_thread(void *arg) +int +icl_limits(const char *offload, size_t *limitp) { - struct icl_conn *ic; - struct icl_pdu_stailq queue; - - ic = arg; - - STAILQ_INIT(&queue); - - ICL_CONN_LOCK(ic); - ic->ic_send_running = true; - - for (;;) { - for (;;) { - /* - * If the local queue is empty, populate it from - * the main one. This way the icl_conn_send_pdus() - * can go through all the queued PDUs without holding - * any locks. - */ - if (STAILQ_EMPTY(&queue)) - STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); - - ic->ic_check_send_space = false; - ICL_CONN_UNLOCK(ic); - icl_conn_send_pdus(ic, &queue); - ICL_CONN_LOCK(ic); - - /* - * The icl_soupcall_send() was called since the last - * call to sbspace(); go around; - */ - if (ic->ic_check_send_space) - continue; - - /* - * Local queue is empty, but we still have PDUs - * in the main one; go around. - */ - if (STAILQ_EMPTY(&queue) && - !STAILQ_EMPTY(&ic->ic_to_send)) - continue; - - /* - * There might be some stuff in the local queue, - * which didn't get sent due to not having enough send - * space. Wait for socket upcall. - */ - break; - } + struct icl_module *im; + int error; - if (ic->ic_disconnecting) { - //ICL_DEBUG("terminating"); - break; - } + sx_slock(&sc->sc_lock); + im = icl_find(offload); - cv_wait(&ic->ic_send_cv, ic->ic_lock); + if (im == NULL) { + ICL_WARN("offload \"%s\" not found", offload); + sx_sunlock(&sc->sc_lock); + return (ENXIO); } - /* - * We're exiting; move PDUs back to the main queue, so they can - * get freed properly. At this point ordering doesn't matter. - */ - STAILQ_CONCAT(&ic->ic_to_send, &queue); + error = im->im_limits(limitp); + sx_sunlock(&sc->sc_lock); - ic->ic_send_running = false; - cv_signal(&ic->ic_send_cv); - ICL_CONN_UNLOCK(ic); - kthread_exit(); + return (error); } -static int -icl_soupcall_send(struct socket *so, void *arg, int waitflag) -{ - struct icl_conn *ic; - - if (!sowriteable(so)) - return (SU_OK); - - ic = arg; - - ICL_CONN_LOCK(ic); - ic->ic_check_send_space = true; - ICL_CONN_UNLOCK(ic); - - cv_signal(&ic->ic_send_cv); - - return (SU_OK); -} int -icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, - int flags) +icl_register(const char *offload, int priority, int (*limits)(size_t *), + struct icl_conn *(*new_conn)(const char *, struct mtx *)) { - struct mbuf *mb, *newmb; - size_t copylen, off = 0; + struct icl_module *im; - KASSERT(len > 0, ("len == 0")); + sx_xlock(&sc->sc_lock); + im = icl_find(offload); - newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); - if (newmb == NULL) { - ICL_WARN("failed to allocate mbuf for %zd bytes", len); - return (ENOMEM); + if (im != NULL) { + ICL_WARN("offload \"%s\" already registered", offload); + sx_xunlock(&sc->sc_lock); + return (EBUSY); } - for (mb = newmb; mb != NULL; mb = mb->m_next) { - copylen = min(M_TRAILINGSPACE(mb), len - off); - memcpy(mtod(mb, char *), (const char *)addr + off, copylen); - mb->m_len = copylen; - off += copylen; - } - KASSERT(off == len, ("%s: off != len", __func__)); + im = malloc(sizeof(*im), M_ICL, M_ZERO | M_WAITOK); + im->im_name = strdup(offload, M_ICL); + im->im_priority = priority; + im->im_limits = limits; + im->im_new_conn = new_conn; - if (request->ip_data_mbuf == NULL) { - request->ip_data_mbuf = newmb; - request->ip_data_len = len; - } else { - m_cat(request->ip_data_mbuf, newmb); - request->ip_data_len += len; - } + TAILQ_INSERT_HEAD(&sc->sc_modules, im, im_next); + sx_xunlock(&sc->sc_lock); + ICL_DEBUG("offload \"%s\" registered", offload); return (0); } -void -icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) -{ - - m_copydata(ip->ip_data_mbuf, off, len, addr); -} - -void -icl_pdu_queue(struct icl_pdu *ip) +int +icl_unregister(const char *offload) { - struct icl_conn *ic; + struct icl_module *im; - ic = ip->ip_conn; - - ICL_CONN_LOCK_ASSERT(ic); - - if (ic->ic_disconnecting || ic->ic_socket == NULL) { - ICL_DEBUG("icl_pdu_queue on closed connection"); - icl_pdu_free(ip); - return; - } + sx_xlock(&sc->sc_lock); + im = icl_find(offload); - if (!STAILQ_EMPTY(&ic->ic_to_send)) { - STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); - /* - * If the queue is not empty, someone else had already - * signaled the send thread; no need to do that again, - * just return. - */ - return; + if (im == NULL) { + ICL_WARN("offload \"%s\" not registered", offload); + sx_xunlock(&sc->sc_lock); + return (ENXIO); } - STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); - cv_signal(&ic->ic_send_cv); -} - -struct icl_conn * -icl_conn_new(const char *name, struct mtx *lock) -{ - struct icl_conn *ic; - - refcount_acquire(&icl_ncons); + TAILQ_REMOVE(&sc->sc_modules, im, im_next); + sx_xunlock(&sc->sc_lock); - ic = uma_zalloc(icl_conn_zone, M_WAITOK | M_ZERO); + free(im->im_name, M_ICL); + free(im, M_ICL); - STAILQ_INIT(&ic->ic_to_send); - ic->ic_lock = lock; - cv_init(&ic->ic_send_cv, "icl_tx"); - cv_init(&ic->ic_receive_cv, "icl_rx"); -#ifdef DIAGNOSTIC - refcount_init(&ic->ic_outstanding_pdus, 0); -#endif - ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; - ic->ic_name = name; - - return (ic); -} - -void -icl_conn_free(struct icl_conn *ic) -{ - - cv_destroy(&ic->ic_send_cv); - cv_destroy(&ic->ic_receive_cv); - uma_zfree(icl_conn_zone, ic); - refcount_release(&icl_ncons); + ICL_DEBUG("offload \"%s\" unregistered", offload); + return (0); } static int -icl_conn_start(struct icl_conn *ic) +icl_load(void) { - size_t minspace; - struct sockopt opt; - int error, one = 1; - - ICL_CONN_LOCK(ic); - - /* - * XXX: Ugly hack. - */ - if (ic->ic_socket == NULL) { - ICL_CONN_UNLOCK(ic); - return (EINVAL); - } - ic->ic_receive_state = ICL_CONN_STATE_BHS; - ic->ic_receive_len = sizeof(struct iscsi_bhs); - ic->ic_disconnecting = false; - - ICL_CONN_UNLOCK(ic); - - /* - * For sendspace, this is required because the current code cannot - * send a PDU in pieces; thus, the minimum buffer size is equal - * to the maximum PDU size. "+4" is to account for possible padding. - * - * What we should actually do here is to use autoscaling, but set - * some minimal buffer size to "minspace". I don't know a way to do - * that, though. - */ - minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + - ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; - if (sendspace < minspace) { - ICL_WARN("kern.icl.sendspace too low; must be at least %zd", - minspace); - sendspace = minspace; - } - if (recvspace < minspace) { - ICL_WARN("kern.icl.recvspace too low; must be at least %zd", - minspace); - recvspace = minspace; - } - - error = soreserve(ic->ic_socket, sendspace, recvspace); - if (error != 0) { - ICL_WARN("soreserve failed with error %d", error); - icl_conn_close(ic); - return (error); - } - ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; - ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; - - /* - * Disable Nagle. - */ - bzero(&opt, sizeof(opt)); - opt.sopt_dir = SOPT_SET; - opt.sopt_level = IPPROTO_TCP; - opt.sopt_name = TCP_NODELAY; - opt.sopt_val = &one; - opt.sopt_valsize = sizeof(one); - error = sosetopt(ic->ic_socket, &opt); - if (error != 0) { - ICL_WARN("disabling TCP_NODELAY failed with error %d", error); - icl_conn_close(ic); - return (error); - } - - /* - * Start threads. - */ - error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", - ic->ic_name); - if (error != 0) { - ICL_WARN("kthread_add(9) failed with error %d", error); - icl_conn_close(ic); - return (error); - } - - error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", - ic->ic_name); - if (error != 0) { - ICL_WARN("kthread_add(9) failed with error %d", error); - icl_conn_close(ic); - return (error); - } - - /* - * Register socket upcall, to get notified about incoming PDUs - * and free space to send outgoing ones. - */ - SOCKBUF_LOCK(&ic->ic_socket->so_snd); - soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); - SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); - SOCKBUF_LOCK(&ic->ic_socket->so_rcv); - soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); - SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); + sc = malloc(sizeof(*sc), M_ICL, M_ZERO | M_WAITOK); + sx_init(&sc->sc_lock, "icl"); + TAILQ_INIT(&sc->sc_modules); return (0); } -int -icl_conn_handoff(struct icl_conn *ic, int fd) -{ - struct file *fp; - struct socket *so; - cap_rights_t rights; - int error; - - ICL_CONN_LOCK_ASSERT_NOT(ic); - - /* - * Steal the socket from userland. - */ - error = fget(curthread, fd, - cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); - if (error != 0) - return (error); - if (fp->f_type != DTYPE_SOCKET) { - fdrop(fp, curthread); - return (EINVAL); - } - so = fp->f_data; - if (so->so_type != SOCK_STREAM) { - fdrop(fp, curthread); - return (EINVAL); - } - - ICL_CONN_LOCK(ic); - - if (ic->ic_socket != NULL) { - ICL_CONN_UNLOCK(ic); - fdrop(fp, curthread); - return (EBUSY); - } - - ic->ic_socket = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fdrop(fp, curthread); - ICL_CONN_UNLOCK(ic); - - error = icl_conn_start(ic); - - return (error); -} - -void -icl_conn_close(struct icl_conn *ic) -{ - struct icl_pdu *pdu; - - ICL_CONN_LOCK_ASSERT_NOT(ic); - - ICL_CONN_LOCK(ic); - if (ic->ic_socket == NULL) { - ICL_CONN_UNLOCK(ic); - return; - } - - /* - * Deregister socket upcalls. - */ - ICL_CONN_UNLOCK(ic); - SOCKBUF_LOCK(&ic->ic_socket->so_snd); - if (ic->ic_socket->so_snd.sb_upcall != NULL) - soupcall_clear(ic->ic_socket, SO_SND); - SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); - SOCKBUF_LOCK(&ic->ic_socket->so_rcv); - if (ic->ic_socket->so_rcv.sb_upcall != NULL) - soupcall_clear(ic->ic_socket, SO_RCV); - SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); - ICL_CONN_LOCK(ic); - - ic->ic_disconnecting = true; - - /* - * Wake up the threads, so they can properly terminate. - */ - while (ic->ic_receive_running || ic->ic_send_running) { - //ICL_DEBUG("waiting for send/receive threads to terminate"); - cv_signal(&ic->ic_receive_cv); - cv_signal(&ic->ic_send_cv); - cv_wait(&ic->ic_send_cv, ic->ic_lock); - } - //ICL_DEBUG("send/receive threads terminated"); - - ICL_CONN_UNLOCK(ic); - soclose(ic->ic_socket); - ICL_CONN_LOCK(ic); - ic->ic_socket = NULL; - - if (ic->ic_receive_pdu != NULL) { - //ICL_DEBUG("freeing partially received PDU"); - icl_pdu_free(ic->ic_receive_pdu); - ic->ic_receive_pdu = NULL; - } - - /* - * Remove any outstanding PDUs from the send queue. - */ - while (!STAILQ_EMPTY(&ic->ic_to_send)) { - pdu = STAILQ_FIRST(&ic->ic_to_send); - STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); - icl_pdu_free(pdu); - } - - KASSERT(STAILQ_EMPTY(&ic->ic_to_send), - ("destroying session with non-empty send queue")); -#ifdef DIAGNOSTIC - KASSERT(ic->ic_outstanding_pdus == 0, - ("destroying session with %d outstanding PDUs", - ic->ic_outstanding_pdus)); -#endif - ICL_CONN_UNLOCK(ic); -} - -bool -icl_conn_connected(struct icl_conn *ic) -{ - ICL_CONN_LOCK_ASSERT_NOT(ic); - - ICL_CONN_LOCK(ic); - if (ic->ic_socket == NULL) { - ICL_CONN_UNLOCK(ic); - return (false); - } - if (ic->ic_socket->so_error != 0) { - ICL_CONN_UNLOCK(ic); - return (false); - } - ICL_CONN_UNLOCK(ic); - return (true); -} - -#ifdef ICL_KERNEL_PROXY -int -icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) -{ - int error; - - ICL_CONN_LOCK_ASSERT_NOT(ic); - - if (so->so_type != SOCK_STREAM) - return (EINVAL); - - ICL_CONN_LOCK(ic); - if (ic->ic_socket != NULL) { - ICL_CONN_UNLOCK(ic); - return (EBUSY); - } - ic->ic_socket = so; - ICL_CONN_UNLOCK(ic); - - error = icl_conn_start(ic); - - return (error); -} -#endif /* ICL_KERNEL_PROXY */ - static int icl_unload(void) { - if (icl_ncons != 0) - return (EBUSY); + sx_slock(&sc->sc_lock); + KASSERT(TAILQ_EMPTY(&sc->sc_modules), ("still have modules")); + sx_sunlock(&sc->sc_lock); - uma_zdestroy(icl_conn_zone); - uma_zdestroy(icl_pdu_zone); + sx_destroy(&sc->sc_lock); + free(sc, M_ICL); return (0); } -static void -icl_load(void) -{ - - icl_conn_zone = uma_zcreate("icl_conn", - sizeof(struct icl_conn), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - icl_pdu_zone = uma_zcreate("icl_pdu", - sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - - refcount_init(&icl_ncons, 0); -} - static int icl_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: - icl_load(); - return (0); + return (icl_load()); case MOD_UNLOAD: return (icl_unload()); default: diff --git a/sys/dev/iscsi/icl.h b/sys/dev/iscsi/icl.h index 50b231ef6996..f667ad66a899 100644 --- a/sys/dev/iscsi/icl.h +++ b/sys/dev/iscsi/icl.h @@ -37,7 +37,32 @@ * and receive iSCSI PDUs. */ +#include <sys/types.h> +#include <sys/kobj.h> +#include <sys/condvar.h> +#include <sys/sysctl.h> + +SYSCTL_DECL(_kern_icl); + +extern int icl_debug; + +#define ICL_DEBUG(X, ...) \ + do { \ + if (icl_debug > 1) \ + printf("%s: " X "\n", __func__, ## __VA_ARGS__);\ + } while (0) + +#define ICL_WARN(X, ...) \ + do { \ + if (icl_debug > 0) { \ + printf("WARNING: %s: " X "\n", \ + __func__, ## __VA_ARGS__); \ + } \ + } while (0) + struct icl_conn; +struct ccb_scsiio; +union ctl_io; struct icl_pdu { STAILQ_ENTRY(icl_pdu) ip_next; @@ -57,13 +82,6 @@ struct icl_pdu { uint32_t ip_prv2; }; -struct icl_pdu *icl_pdu_new(struct icl_conn *ic, int flags); -size_t icl_pdu_data_segment_length(const struct icl_pdu *ip); -int icl_pdu_append_data(struct icl_pdu *ip, const void *addr, size_t len, int flags); -void icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len); -void icl_pdu_queue(struct icl_pdu *ip); -void icl_pdu_free(struct icl_pdu *ip); - #define ICL_CONN_STATE_INVALID 0 #define ICL_CONN_STATE_BHS 1 #define ICL_CONN_STATE_AHS 2 @@ -74,6 +92,7 @@ void icl_pdu_free(struct icl_pdu *ip); #define ICL_MAX_DATA_SEGMENT_LENGTH (128 * 1024) struct icl_conn { + KOBJ_FIELDS; struct mtx *ic_lock; struct socket *ic_socket; #ifdef DIAGNOSTIC @@ -104,11 +123,14 @@ struct icl_conn { void *ic_prv0; }; -struct icl_conn *icl_conn_new(const char *name, struct mtx *lock); -void icl_conn_free(struct icl_conn *ic); -int icl_conn_handoff(struct icl_conn *ic, int fd); -void icl_conn_close(struct icl_conn *ic); -bool icl_conn_connected(struct icl_conn *ic); +struct icl_conn *icl_new_conn(const char *offload, const char *name, + struct mtx *lock); +int icl_limits(const char *offload, size_t *limitp); + +int icl_register(const char *offload, int priority, + int (*limits)(size_t *), + struct icl_conn *(*new_conn)(const char *, struct mtx *)); +int icl_unregister(const char *offload); #ifdef ICL_KERNEL_PROXY diff --git a/sys/dev/iscsi/icl_conn_if.m b/sys/dev/iscsi/icl_conn_if.m new file mode 100644 index 000000000000..d3ac57f0420d --- /dev/null +++ b/sys/dev/iscsi/icl_conn_if.m @@ -0,0 +1,87 @@ +#- +# Copyright (c) 2014 The FreeBSD Foundation +# All rights reserved. +# +# This software was developed by Edward Tomasz Napierala under sponsorship +# from the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +#include <dev/iscsi/icl.h> + +INTERFACE icl_conn; + +METHOD size_t pdu_data_segment_length { + struct icl_conn *_ic; + const struct icl_pdu *_ip; +}; + +METHOD int pdu_append_data { + struct icl_conn *_ic; + struct icl_pdu *_ip; + const void *_addr; + size_t _len; + int _flags; +}; + +METHOD void pdu_get_data { + struct icl_conn *_ic; + struct icl_pdu *_ip; + size_t _off; + void *_addr; + size_t _len; +}; + +METHOD void pdu_queue { + struct icl_conn *_ic; + struct icl_pdu *_ip; +}; + +METHOD void pdu_free { + struct icl_conn *_ic; + struct icl_pdu *_ip; +}; + +METHOD struct icl_pdu * new_pdu { + struct icl_conn *_ic; + int _flags; +}; + +METHOD void free { + struct icl_conn *_ic; +}; + +METHOD int handoff { + struct icl_conn *_ic; + int _fd; +}; + +METHOD void close { + struct icl_conn *_ic; +}; + +METHOD bool connected { + struct icl_conn *_ic; +}; diff --git a/sys/dev/iscsi/icl_soft.c b/sys/dev/iscsi/icl_soft.c new file mode 100644 index 000000000000..9b6c695eb7c3 --- /dev/null +++ b/sys/dev/iscsi/icl_soft.c @@ -0,0 +1,1537 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * iSCSI Common Layer. It's used by both the initiator and target to send + * and receive iSCSI PDUs. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/capsicum.h> +#include <sys/condvar.h> +#include <sys/conf.h> +#include <sys/file.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/lock.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/module.h> +#include <sys/protosw.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/sx.h> +#include <sys/uio.h> +#include <vm/uma.h> +#include <netinet/in.h> +#include <netinet/tcp.h> + +#include <dev/iscsi/icl.h> +#include <dev/iscsi/iscsi_proto.h> +#include <icl_conn_if.h> + +static int coalesce = 1; +SYSCTL_INT(_kern_icl, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &coalesce, 0, "Try to coalesce PDUs before sending"); +static int partial_receive_len = 128 * 1024; +SYSCTL_INT(_kern_icl, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, + &partial_receive_len, 0, "Minimum read size for partially received " + "data segment"); +static int sendspace = 1048576; +SYSCTL_INT(_kern_icl, OID_AUTO, sendspace, CTLFLAG_RWTUN, + &sendspace, 0, "Default send socket buffer size"); +static int recvspace = 1048576; +SYSCTL_INT(_kern_icl, OID_AUTO, recvspace, CTLFLAG_RWTUN, + &recvspace, 0, "Default receive socket buffer size"); + +static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend"); +static uma_zone_t icl_pdu_zone; + +static volatile u_int icl_ncons; + +#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) +#define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) +#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) +#define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) + +STAILQ_HEAD(icl_pdu_stailq, icl_pdu); + +static icl_conn_new_pdu_t icl_soft_conn_new_pdu; +static icl_conn_pdu_free_t icl_soft_conn_pdu_free; +static icl_conn_pdu_data_segment_length_t + icl_soft_conn_pdu_data_segment_length; +static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data; +static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data; +static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue; +static icl_conn_handoff_t icl_soft_conn_handoff; +static icl_conn_free_t icl_soft_conn_free; +static icl_conn_close_t icl_soft_conn_close; +static icl_conn_connected_t icl_soft_conn_connected; + +static kobj_method_t icl_soft_methods[] = { + KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu), + KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free), + KOBJMETHOD(icl_conn_pdu_data_segment_length, + icl_soft_conn_pdu_data_segment_length), + KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data), + KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data), + KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue), + KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff), + KOBJMETHOD(icl_conn_free, icl_soft_conn_free), + KOBJMETHOD(icl_conn_close, icl_soft_conn_close), + KOBJMETHOD(icl_conn_connected, icl_soft_conn_connected), + { 0, 0 } +}; + +DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_conn)); + +static void icl_conn_close(struct icl_conn *ic); + +static void +icl_conn_fail(struct icl_conn *ic) +{ + if (ic->ic_socket == NULL) + return; + + /* + * XXX + */ + ic->ic_socket->so_error = EDOOFUS; + (ic->ic_error)(ic); +} + +static struct mbuf * +icl_conn_receive(struct icl_conn *ic, size_t len) +{ + struct uio uio; + struct socket *so; + struct mbuf *m; + int error, flags; + + so = ic->ic_socket; + + memset(&uio, 0, sizeof(uio)); + uio.uio_resid = len; + + flags = MSG_DONTWAIT; + error = soreceive(so, NULL, &uio, &m, NULL, &flags); + if (error != 0) { + ICL_DEBUG("soreceive error %d", error); + return (NULL); + } + if (uio.uio_resid != 0) { + m_freem(m); + ICL_DEBUG("short read"); + return (NULL); + } + + return (m); +} + +static struct icl_pdu * +icl_pdu_new_empty(struct icl_conn *ic, int flags) +{ + struct icl_pdu *ip; + +#ifdef DIAGNOSTIC + refcount_acquire(&ic->ic_outstanding_pdus); +#endif + ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO); + if (ip == NULL) { + ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); +#ifdef DIAGNOSTIC + refcount_release(&ic->ic_outstanding_pdus); +#endif + return (NULL); + } + + ip->ip_conn = ic; + + return (ip); +} + +static void +icl_pdu_free(struct icl_pdu *ip) +{ + struct icl_conn *ic; + + ic = ip->ip_conn; + + m_freem(ip->ip_bhs_mbuf); + m_freem(ip->ip_ahs_mbuf); + m_freem(ip->ip_data_mbuf); + uma_zfree(icl_pdu_zone, ip); +#ifdef DIAGNOSTIC + refcount_release(&ic->ic_outstanding_pdus); +#endif +} + +void +icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) +{ + icl_pdu_free(ip); +} + +/* + * Allocate icl_pdu with empty BHS to fill up by the caller. + */ +struct icl_pdu * +icl_soft_conn_new_pdu(struct icl_conn *ic, int flags) +{ + struct icl_pdu *ip; + + ip = icl_pdu_new_empty(ic, flags); + if (ip == NULL) + return (NULL); + + ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs), + flags, MT_DATA, M_PKTHDR); + if (ip->ip_bhs_mbuf == NULL) { + ICL_WARN("failed to allocate %zd bytes", sizeof(*ip)); + icl_pdu_free(ip); + return (NULL); + } + ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *); + memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs)); + ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs); + + return (ip); +} + +static int +icl_pdu_ahs_length(const struct icl_pdu *request) +{ + + return (request->ip_bhs->bhs_total_ahs_len * 4); +} + +static size_t +icl_pdu_data_segment_length(const struct icl_pdu *request) +{ + uint32_t len = 0; + + len += request->ip_bhs->bhs_data_segment_len[0]; + len <<= 8; + len += request->ip_bhs->bhs_data_segment_len[1]; + len <<= 8; + len += request->ip_bhs->bhs_data_segment_len[2]; + + return (len); +} + +size_t +icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic, + const struct icl_pdu *request) +{ + + return (icl_pdu_data_segment_length(request)); +} + +static void +icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len) +{ + + response->ip_bhs->bhs_data_segment_len[2] = len; + response->ip_bhs->bhs_data_segment_len[1] = len >> 8; + response->ip_bhs->bhs_data_segment_len[0] = len >> 16; +} + +static size_t +icl_pdu_padding(const struct icl_pdu *ip) +{ + + if ((ip->ip_data_len % 4) != 0) + return (4 - (ip->ip_data_len % 4)); + + return (0); +} + +static size_t +icl_pdu_size(const struct icl_pdu *response) +{ + size_t len; + + KASSERT(response->ip_ahs_len == 0, ("responding with AHS")); + + len = sizeof(struct iscsi_bhs) + response->ip_data_len + + icl_pdu_padding(response); + if (response->ip_conn->ic_header_crc32c) + len += ISCSI_HEADER_DIGEST_SIZE; + if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c) + len += ISCSI_DATA_DIGEST_SIZE; + + return (len); +} + +static int +icl_pdu_receive_bhs(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + + m = icl_conn_receive(request->ip_conn, sizeof(struct iscsi_bhs)); + if (m == NULL) { + ICL_DEBUG("failed to receive BHS"); + return (-1); + } + + request->ip_bhs_mbuf = m_pullup(m, sizeof(struct iscsi_bhs)); + if (request->ip_bhs_mbuf == NULL) { + ICL_WARN("m_pullup failed"); + return (-1); + } + request->ip_bhs = mtod(request->ip_bhs_mbuf, struct iscsi_bhs *); + + /* + * XXX: For architectures with strict alignment requirements + * we may need to allocate ip_bhs and copy the data into it. + * For some reason, though, not doing this doesn't seem + * to cause problems; tested on sparc64. + */ + + *availablep -= sizeof(struct iscsi_bhs); + return (0); +} + +static int +icl_pdu_receive_ahs(struct icl_pdu *request, size_t *availablep) +{ + + request->ip_ahs_len = icl_pdu_ahs_length(request); + if (request->ip_ahs_len == 0) + return (0); + + request->ip_ahs_mbuf = icl_conn_receive(request->ip_conn, + request->ip_ahs_len); + if (request->ip_ahs_mbuf == NULL) { + ICL_DEBUG("failed to receive AHS"); + return (-1); + } + + *availablep -= request->ip_ahs_len; + return (0); +} + +static uint32_t +icl_mbuf_to_crc32c(const struct mbuf *m0) +{ + uint32_t digest = 0xffffffff; + const struct mbuf *m; + + for (m = m0; m != NULL; m = m->m_next) + digest = calculate_crc32c(digest, + mtod(m, const void *), m->m_len); + + digest = digest ^ 0xffffffff; + + return (digest); +} + +static int +icl_pdu_check_header_digest(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + uint32_t received_digest, valid_digest; + + if (request->ip_conn->ic_header_crc32c == false) + return (0); + + m = icl_conn_receive(request->ip_conn, ISCSI_HEADER_DIGEST_SIZE); + if (m == NULL) { + ICL_DEBUG("failed to receive header digest"); + return (-1); + } + + CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE); + m_copydata(m, 0, ISCSI_HEADER_DIGEST_SIZE, (void *)&received_digest); + m_freem(m); + + *availablep -= ISCSI_HEADER_DIGEST_SIZE; + + /* + * XXX: Handle AHS. + */ + valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); + if (received_digest != valid_digest) { + ICL_WARN("header digest check failed; got 0x%x, " + "should be 0x%x", received_digest, valid_digest); + return (-1); + } + + return (0); +} + +/* + * Return the number of bytes that should be waiting in the receive socket + * before icl_pdu_receive_data_segment() gets called. + */ +static size_t +icl_pdu_data_segment_receive_len(const struct icl_pdu *request) +{ + size_t len; + + len = icl_pdu_data_segment_length(request); + if (len == 0) + return (0); + + /* + * Account for the parts of data segment already read from + * the socket buffer. + */ + KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); + len -= request->ip_data_len; + + /* + * Don't always wait for the full data segment to be delivered + * to the socket; this might badly affect performance due to + * TCP window scaling. + */ + if (len > partial_receive_len) { +#if 0 + ICL_DEBUG("need %zd bytes of data, limiting to %zd", + len, partial_receive_len)); +#endif + len = partial_receive_len; + + return (len); + } + + /* + * Account for padding. Note that due to the way code is written, + * the icl_pdu_receive_data_segment() must always receive padding + * along with the last part of data segment, because it would be + * impossible to tell whether we've already received the full data + * segment including padding, or without it. + */ + if ((len % 4) != 0) + len += 4 - (len % 4); + +#if 0 + ICL_DEBUG("need %zd bytes of data", len)); +#endif + + return (len); +} + +static int +icl_pdu_receive_data_segment(struct icl_pdu *request, + size_t *availablep, bool *more_neededp) +{ + struct icl_conn *ic; + size_t len, padding = 0; + struct mbuf *m; + + ic = request->ip_conn; + + *more_neededp = false; + ic->ic_receive_len = 0; + + len = icl_pdu_data_segment_length(request); + if (len == 0) + return (0); + + if ((len % 4) != 0) + padding = 4 - (len % 4); + + /* + * Account for already received parts of data segment. + */ + KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len")); + len -= request->ip_data_len; + + if (len + padding > *availablep) { + /* + * Not enough data in the socket buffer. Receive as much + * as we can. Don't receive padding, since, obviously, it's + * not the end of data segment yet. + */ +#if 0 + ICL_DEBUG("limited from %zd to %zd", + len + padding, *availablep - padding)); +#endif + len = *availablep - padding; + *more_neededp = true; + padding = 0; + } + + /* + * Must not try to receive padding without at least one byte + * of actual data segment. + */ + if (len > 0) { + m = icl_conn_receive(request->ip_conn, len + padding); + if (m == NULL) { + ICL_DEBUG("failed to receive data segment"); + return (-1); + } + + if (request->ip_data_mbuf == NULL) + request->ip_data_mbuf = m; + else + m_cat(request->ip_data_mbuf, m); + + request->ip_data_len += len; + *availablep -= len + padding; + } else + ICL_DEBUG("len 0"); + + if (*more_neededp) + ic->ic_receive_len = + icl_pdu_data_segment_receive_len(request); + + return (0); +} + +static int +icl_pdu_check_data_digest(struct icl_pdu *request, size_t *availablep) +{ + struct mbuf *m; + uint32_t received_digest, valid_digest; + + if (request->ip_conn->ic_data_crc32c == false) + return (0); + + if (request->ip_data_len == 0) + return (0); + + m = icl_conn_receive(request->ip_conn, ISCSI_DATA_DIGEST_SIZE); + if (m == NULL) { + ICL_DEBUG("failed to receive data digest"); + return (-1); + } + + CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE); + m_copydata(m, 0, ISCSI_DATA_DIGEST_SIZE, (void *)&received_digest); + m_freem(m); + + *availablep -= ISCSI_DATA_DIGEST_SIZE; + + /* + * Note that ip_data_mbuf also contains padding; since digest + * calculation is supposed to include that, we iterate over + * the entire ip_data_mbuf chain, not just ip_data_len bytes of it. + */ + valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); + if (received_digest != valid_digest) { + ICL_WARN("data digest check failed; got 0x%x, " + "should be 0x%x", received_digest, valid_digest); + return (-1); + } + + return (0); +} + +/* + * Somewhat contrary to the name, this attempts to receive only one + * "part" of PDU at a time; call it repeatedly until it returns non-NULL. + */ +static struct icl_pdu * +icl_conn_receive_pdu(struct icl_conn *ic, size_t *availablep) +{ + struct icl_pdu *request; + struct socket *so; + size_t len; + int error; + bool more_needed; + + so = ic->ic_socket; + + if (ic->ic_receive_state == ICL_CONN_STATE_BHS) { + KASSERT(ic->ic_receive_pdu == NULL, + ("ic->ic_receive_pdu != NULL")); + request = icl_pdu_new_empty(ic, M_NOWAIT); + if (request == NULL) { + ICL_DEBUG("failed to allocate PDU; " + "dropping connection"); + icl_conn_fail(ic); + return (NULL); + } + ic->ic_receive_pdu = request; + } else { + KASSERT(ic->ic_receive_pdu != NULL, + ("ic->ic_receive_pdu == NULL")); + request = ic->ic_receive_pdu; + } + + if (*availablep < ic->ic_receive_len) { +#if 0 + ICL_DEBUG("not enough data; need %zd, " + "have %zd", ic->ic_receive_len, *availablep); +#endif + return (NULL); + } + + switch (ic->ic_receive_state) { + case ICL_CONN_STATE_BHS: + //ICL_DEBUG("receiving BHS"); + error = icl_pdu_receive_bhs(request, availablep); + if (error != 0) { + ICL_DEBUG("failed to receive BHS; " + "dropping connection"); + break; + } + + /* + * We don't enforce any limit for AHS length; + * its length is stored in 8 bit field. + */ + + len = icl_pdu_data_segment_length(request); + if (len > ic->ic_max_data_segment_length) { + ICL_WARN("received data segment " + "length %zd is larger than negotiated " + "MaxDataSegmentLength %zd; " + "dropping connection", + len, ic->ic_max_data_segment_length); + error = EINVAL; + break; + } + + ic->ic_receive_state = ICL_CONN_STATE_AHS; + ic->ic_receive_len = icl_pdu_ahs_length(request); + break; + + case ICL_CONN_STATE_AHS: + //ICL_DEBUG("receiving AHS"); + error = icl_pdu_receive_ahs(request, availablep); + if (error != 0) { + ICL_DEBUG("failed to receive AHS; " + "dropping connection"); + break; + } + ic->ic_receive_state = ICL_CONN_STATE_HEADER_DIGEST; + if (ic->ic_header_crc32c == false) + ic->ic_receive_len = 0; + else + ic->ic_receive_len = ISCSI_HEADER_DIGEST_SIZE; + break; + + case ICL_CONN_STATE_HEADER_DIGEST: + //ICL_DEBUG("receiving header digest"); + error = icl_pdu_check_header_digest(request, availablep); + if (error != 0) { + ICL_DEBUG("header digest failed; " + "dropping connection"); + break; + } + + ic->ic_receive_state = ICL_CONN_STATE_DATA; + ic->ic_receive_len = + icl_pdu_data_segment_receive_len(request); + break; + + case ICL_CONN_STATE_DATA: + //ICL_DEBUG("receiving data segment"); + error = icl_pdu_receive_data_segment(request, availablep, + &more_needed); + if (error != 0) { + ICL_DEBUG("failed to receive data segment;" + "dropping connection"); + break; + } + + if (more_needed) + break; + + ic->ic_receive_state = ICL_CONN_STATE_DATA_DIGEST; + if (request->ip_data_len == 0 || ic->ic_data_crc32c == false) + ic->ic_receive_len = 0; + else + ic->ic_receive_len = ISCSI_DATA_DIGEST_SIZE; + break; + + case ICL_CONN_STATE_DATA_DIGEST: + //ICL_DEBUG("receiving data digest"); + error = icl_pdu_check_data_digest(request, availablep); + if (error != 0) { + ICL_DEBUG("data digest failed; " + "dropping connection"); + break; + } + + /* + * We've received complete PDU; reset the receive state machine + * and return the PDU. + */ + ic->ic_receive_state = ICL_CONN_STATE_BHS; + ic->ic_receive_len = sizeof(struct iscsi_bhs); + ic->ic_receive_pdu = NULL; + return (request); + + default: + panic("invalid ic_receive_state %d\n", ic->ic_receive_state); + } + + if (error != 0) { + /* + * Don't free the PDU; it's pointed to by ic->ic_receive_pdu + * and will get freed in icl_conn_close(). + */ + icl_conn_fail(ic); + } + + return (NULL); +} + +static void +icl_conn_receive_pdus(struct icl_conn *ic, size_t available) +{ + struct icl_pdu *response; + struct socket *so; + + so = ic->ic_socket; + + /* + * This can never happen; we're careful to only mess with ic->ic_socket + * pointer when the send/receive threads are not running. + */ + KASSERT(so != NULL, ("NULL socket")); + + for (;;) { + if (ic->ic_disconnecting) + return; + + if (so->so_error != 0) { + ICL_DEBUG("connection error %d; " + "dropping connection", so->so_error); + icl_conn_fail(ic); + return; + } + + /* + * Loop until we have a complete PDU or there is not enough + * data in the socket buffer. + */ + if (available < ic->ic_receive_len) { +#if 0 + ICL_DEBUG("not enough data; have %zd, " + "need %zd", available, + ic->ic_receive_len); +#endif + return; + } + + response = icl_conn_receive_pdu(ic, &available); + if (response == NULL) + continue; + + if (response->ip_ahs_len > 0) { + ICL_WARN("received PDU with unsupported " + "AHS; opcode 0x%x; dropping connection", + response->ip_bhs->bhs_opcode); + icl_pdu_free(response); + icl_conn_fail(ic); + return; + } + + (ic->ic_receive)(response); + } +} + +static void +icl_receive_thread(void *arg) +{ + struct icl_conn *ic; + size_t available; + struct socket *so; + + ic = arg; + so = ic->ic_socket; + + ICL_CONN_LOCK(ic); + ic->ic_receive_running = true; + ICL_CONN_UNLOCK(ic); + + for (;;) { + if (ic->ic_disconnecting) { + //ICL_DEBUG("terminating"); + break; + } + + /* + * Set the low watermark, to be checked by + * soreadable() in icl_soupcall_receive() + * to avoid unneccessary wakeups until there + * is enough data received to read the PDU. + */ + SOCKBUF_LOCK(&so->so_rcv); + available = sbavail(&so->so_rcv); + if (available < ic->ic_receive_len) { + so->so_rcv.sb_lowat = ic->ic_receive_len; + cv_wait(&ic->ic_receive_cv, &so->so_rcv.sb_mtx); + } else + so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1; + SOCKBUF_UNLOCK(&so->so_rcv); + + icl_conn_receive_pdus(ic, available); + } + + ICL_CONN_LOCK(ic); + ic->ic_receive_running = false; + cv_signal(&ic->ic_send_cv); + ICL_CONN_UNLOCK(ic); + kthread_exit(); +} + +static int +icl_soupcall_receive(struct socket *so, void *arg, int waitflag) +{ + struct icl_conn *ic; + + if (!soreadable(so)) + return (SU_OK); + + ic = arg; + cv_signal(&ic->ic_receive_cv); + return (SU_OK); +} + +static int +icl_pdu_finalize(struct icl_pdu *request) +{ + size_t padding, pdu_len; + uint32_t digest, zero = 0; + int ok; + struct icl_conn *ic; + + ic = request->ip_conn; + + icl_pdu_set_data_segment_length(request, request->ip_data_len); + + pdu_len = icl_pdu_size(request); + + if (ic->ic_header_crc32c) { + digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf); + ok = m_append(request->ip_bhs_mbuf, sizeof(digest), + (void *)&digest); + if (ok != 1) { + ICL_WARN("failed to append header digest"); + return (1); + } + } + + if (request->ip_data_len != 0) { + padding = icl_pdu_padding(request); + if (padding > 0) { + ok = m_append(request->ip_data_mbuf, padding, + (void *)&zero); + if (ok != 1) { + ICL_WARN("failed to append padding"); + return (1); + } + } + + if (ic->ic_data_crc32c) { + digest = icl_mbuf_to_crc32c(request->ip_data_mbuf); + + ok = m_append(request->ip_data_mbuf, sizeof(digest), + (void *)&digest); + if (ok != 1) { + ICL_WARN("failed to append data digest"); + return (1); + } + } + + m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf); + request->ip_data_mbuf = NULL; + } + + request->ip_bhs_mbuf->m_pkthdr.len = pdu_len; + + return (0); +} + +static void +icl_conn_send_pdus(struct icl_conn *ic, struct icl_pdu_stailq *queue) +{ + struct icl_pdu *request, *request2; + struct socket *so; + size_t available, size, size2; + int coalesced, error; + + ICL_CONN_LOCK_ASSERT_NOT(ic); + + so = ic->ic_socket; + + SOCKBUF_LOCK(&so->so_snd); + /* + * Check how much space do we have for transmit. We can't just + * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE, + * as it always frees the mbuf chain passed to it, even in case + * of error. + */ + available = sbspace(&so->so_snd); + + /* + * Notify the socket upcall that we don't need wakeups + * for the time being. + */ + so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1; + SOCKBUF_UNLOCK(&so->so_snd); + + while (!STAILQ_EMPTY(queue)) { + request = STAILQ_FIRST(queue); + size = icl_pdu_size(request); + if (available < size) { + + /* + * Set the low watermark, to be checked by + * sowriteable() in icl_soupcall_send() + * to avoid unneccessary wakeups until there + * is enough space for the PDU to fit. + */ + SOCKBUF_LOCK(&so->so_snd); + available = sbspace(&so->so_snd); + if (available < size) { +#if 1 + ICL_DEBUG("no space to send; " + "have %zd, need %zd", + available, size); +#endif + so->so_snd.sb_lowat = size; + SOCKBUF_UNLOCK(&so->so_snd); + return; + } + SOCKBUF_UNLOCK(&so->so_snd); + } + STAILQ_REMOVE_HEAD(queue, ip_next); + error = icl_pdu_finalize(request); + if (error != 0) { + ICL_DEBUG("failed to finalize PDU; " + "dropping connection"); + icl_conn_fail(ic); + icl_pdu_free(request); + return; + } + if (coalesce) { + coalesced = 1; + for (;;) { + request2 = STAILQ_FIRST(queue); + if (request2 == NULL) + break; + size2 = icl_pdu_size(request2); + if (available < size + size2) + break; + STAILQ_REMOVE_HEAD(queue, ip_next); + error = icl_pdu_finalize(request2); + if (error != 0) { + ICL_DEBUG("failed to finalize PDU; " + "dropping connection"); + icl_conn_fail(ic); + icl_pdu_free(request); + icl_pdu_free(request2); + return; + } + m_cat(request->ip_bhs_mbuf, request2->ip_bhs_mbuf); + request2->ip_bhs_mbuf = NULL; + request->ip_bhs_mbuf->m_pkthdr.len += size2; + size += size2; + STAILQ_REMOVE_AFTER(queue, request, ip_next); + icl_pdu_free(request2); + coalesced++; + } +#if 0 + if (coalesced > 1) { + ICL_DEBUG("coalesced %d PDUs into %zd bytes", + coalesced, size); + } +#endif + } + available -= size; + error = sosend(so, NULL, NULL, request->ip_bhs_mbuf, + NULL, MSG_DONTWAIT, curthread); + request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */ + if (error != 0) { + ICL_DEBUG("failed to send PDU, error %d; " + "dropping connection", error); + icl_conn_fail(ic); + icl_pdu_free(request); + return; + } + icl_pdu_free(request); + } +} + +static void +icl_send_thread(void *arg) +{ + struct icl_conn *ic; + struct icl_pdu_stailq queue; + + ic = arg; + + STAILQ_INIT(&queue); + + ICL_CONN_LOCK(ic); + ic->ic_send_running = true; + + for (;;) { + for (;;) { + /* + * If the local queue is empty, populate it from + * the main one. This way the icl_conn_send_pdus() + * can go through all the queued PDUs without holding + * any locks. + */ + if (STAILQ_EMPTY(&queue)) + STAILQ_SWAP(&ic->ic_to_send, &queue, icl_pdu); + + ic->ic_check_send_space = false; + ICL_CONN_UNLOCK(ic); + icl_conn_send_pdus(ic, &queue); + ICL_CONN_LOCK(ic); + + /* + * The icl_soupcall_send() was called since the last + * call to sbspace(); go around; + */ + if (ic->ic_check_send_space) + continue; + + /* + * Local queue is empty, but we still have PDUs + * in the main one; go around. + */ + if (STAILQ_EMPTY(&queue) && + !STAILQ_EMPTY(&ic->ic_to_send)) + continue; + + /* + * There might be some stuff in the local queue, + * which didn't get sent due to not having enough send + * space. Wait for socket upcall. + */ + break; + } + + if (ic->ic_disconnecting) { + //ICL_DEBUG("terminating"); + break; + } + + cv_wait(&ic->ic_send_cv, ic->ic_lock); + } + + /* + * We're exiting; move PDUs back to the main queue, so they can + * get freed properly. At this point ordering doesn't matter. + */ + STAILQ_CONCAT(&ic->ic_to_send, &queue); + + ic->ic_send_running = false; + cv_signal(&ic->ic_send_cv); + ICL_CONN_UNLOCK(ic); + kthread_exit(); +} + +static int +icl_soupcall_send(struct socket *so, void *arg, int waitflag) +{ + struct icl_conn *ic; + + if (!sowriteable(so)) + return (SU_OK); + + ic = arg; + + ICL_CONN_LOCK(ic); + ic->ic_check_send_space = true; + ICL_CONN_UNLOCK(ic); + + cv_signal(&ic->ic_send_cv); + + return (SU_OK); +} + +static int +icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len, + int flags) +{ + struct mbuf *mb, *newmb; + size_t copylen, off = 0; + + KASSERT(len > 0, ("len == 0")); + + newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR); + if (newmb == NULL) { + ICL_WARN("failed to allocate mbuf for %zd bytes", len); + return (ENOMEM); + } + + for (mb = newmb; mb != NULL; mb = mb->m_next) { + copylen = min(M_TRAILINGSPACE(mb), len - off); + memcpy(mtod(mb, char *), (const char *)addr + off, copylen); + mb->m_len = copylen; + off += copylen; + } + KASSERT(off == len, ("%s: off != len", __func__)); + + if (request->ip_data_mbuf == NULL) { + request->ip_data_mbuf = newmb; + request->ip_data_len = len; + } else { + m_cat(request->ip_data_mbuf, newmb); + request->ip_data_len += len; + } + + return (0); +} + +int +icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, + const void *addr, size_t len, int flags) +{ + + return (icl_pdu_append_data(request, addr, len, flags)); +} + +static void +icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) +{ + + m_copydata(ip->ip_data_mbuf, off, len, addr); +} + +void +icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, + size_t off, void *addr, size_t len) +{ + + return (icl_pdu_get_data(ip, off, addr, len)); +} + +static void +icl_pdu_queue(struct icl_pdu *ip) +{ + struct icl_conn *ic; + + ic = ip->ip_conn; + + ICL_CONN_LOCK_ASSERT(ic); + + if (ic->ic_disconnecting || ic->ic_socket == NULL) { + ICL_DEBUG("icl_pdu_queue on closed connection"); + icl_pdu_free(ip); + return; + } + + if (!STAILQ_EMPTY(&ic->ic_to_send)) { + STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); + /* + * If the queue is not empty, someone else had already + * signaled the send thread; no need to do that again, + * just return. + */ + return; + } + + STAILQ_INSERT_TAIL(&ic->ic_to_send, ip, ip_next); + cv_signal(&ic->ic_send_cv); +} + +void +icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) +{ + + icl_pdu_queue(ip); +} + +static struct icl_conn * +icl_soft_new_conn(const char *name, struct mtx *lock) +{ + struct icl_conn *ic; + + refcount_acquire(&icl_ncons); + + ic = (struct icl_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT, M_WAITOK | M_ZERO); + + STAILQ_INIT(&ic->ic_to_send); + ic->ic_lock = lock; + cv_init(&ic->ic_send_cv, "icl_tx"); + cv_init(&ic->ic_receive_cv, "icl_rx"); +#ifdef DIAGNOSTIC + refcount_init(&ic->ic_outstanding_pdus, 0); +#endif + ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH; + ic->ic_name = name; + + return (ic); +} + +void +icl_soft_conn_free(struct icl_conn *ic) +{ + + cv_destroy(&ic->ic_send_cv); + cv_destroy(&ic->ic_receive_cv); + kobj_delete((struct kobj *)ic, M_ICL_SOFT); + refcount_release(&icl_ncons); +} + +static int +icl_conn_start(struct icl_conn *ic) +{ + size_t minspace; + struct sockopt opt; + int error, one = 1; + + ICL_CONN_LOCK(ic); + + /* + * XXX: Ugly hack. + */ + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return (EINVAL); + } + + ic->ic_receive_state = ICL_CONN_STATE_BHS; + ic->ic_receive_len = sizeof(struct iscsi_bhs); + ic->ic_disconnecting = false; + + ICL_CONN_UNLOCK(ic); + + /* + * For sendspace, this is required because the current code cannot + * send a PDU in pieces; thus, the minimum buffer size is equal + * to the maximum PDU size. "+4" is to account for possible padding. + * + * What we should actually do here is to use autoscaling, but set + * some minimal buffer size to "minspace". I don't know a way to do + * that, though. + */ + minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + + ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; + if (sendspace < minspace) { + ICL_WARN("kern.icl.sendspace too low; must be at least %zd", + minspace); + sendspace = minspace; + } + if (recvspace < minspace) { + ICL_WARN("kern.icl.recvspace too low; must be at least %zd", + minspace); + recvspace = minspace; + } + + error = soreserve(ic->ic_socket, sendspace, recvspace); + if (error != 0) { + ICL_WARN("soreserve failed with error %d", error); + icl_conn_close(ic); + return (error); + } + ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE; + ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE; + + /* + * Disable Nagle. + */ + bzero(&opt, sizeof(opt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = IPPROTO_TCP; + opt.sopt_name = TCP_NODELAY; + opt.sopt_val = &one; + opt.sopt_valsize = sizeof(one); + error = sosetopt(ic->ic_socket, &opt); + if (error != 0) { + ICL_WARN("disabling TCP_NODELAY failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + /* + * Start threads. + */ + error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx", + ic->ic_name); + if (error != 0) { + ICL_WARN("kthread_add(9) failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx", + ic->ic_name); + if (error != 0) { + ICL_WARN("kthread_add(9) failed with error %d", error); + icl_conn_close(ic); + return (error); + } + + /* + * Register socket upcall, to get notified about incoming PDUs + * and free space to send outgoing ones. + */ + SOCKBUF_LOCK(&ic->ic_socket->so_snd); + soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic); + SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); + SOCKBUF_LOCK(&ic->ic_socket->so_rcv); + soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic); + SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); + + return (0); +} + +int +icl_soft_conn_handoff(struct icl_conn *ic, int fd) +{ + struct file *fp; + struct socket *so; + cap_rights_t rights; + int error; + + ICL_CONN_LOCK_ASSERT_NOT(ic); + + /* + * Steal the socket from userland. + */ + error = fget(curthread, fd, + cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_SOCKET) { + fdrop(fp, curthread); + return (EINVAL); + } + so = fp->f_data; + if (so->so_type != SOCK_STREAM) { + fdrop(fp, curthread); + return (EINVAL); + } + + ICL_CONN_LOCK(ic); + + if (ic->ic_socket != NULL) { + ICL_CONN_UNLOCK(ic); + fdrop(fp, curthread); + return (EBUSY); + } + + ic->ic_socket = fp->f_data; + fp->f_ops = &badfileops; + fp->f_data = NULL; + fdrop(fp, curthread); + ICL_CONN_UNLOCK(ic); + + error = icl_conn_start(ic); + + return (error); +} + +void +icl_conn_close(struct icl_conn *ic) +{ + struct icl_pdu *pdu; + + ICL_CONN_LOCK_ASSERT_NOT(ic); + + ICL_CONN_LOCK(ic); + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return; + } + + /* + * Deregister socket upcalls. + */ + ICL_CONN_UNLOCK(ic); + SOCKBUF_LOCK(&ic->ic_socket->so_snd); + if (ic->ic_socket->so_snd.sb_upcall != NULL) + soupcall_clear(ic->ic_socket, SO_SND); + SOCKBUF_UNLOCK(&ic->ic_socket->so_snd); + SOCKBUF_LOCK(&ic->ic_socket->so_rcv); + if (ic->ic_socket->so_rcv.sb_upcall != NULL) + soupcall_clear(ic->ic_socket, SO_RCV); + SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv); + ICL_CONN_LOCK(ic); + + ic->ic_disconnecting = true; + + /* + * Wake up the threads, so they can properly terminate. + */ + while (ic->ic_receive_running || ic->ic_send_running) { + //ICL_DEBUG("waiting for send/receive threads to terminate"); + cv_signal(&ic->ic_receive_cv); + cv_signal(&ic->ic_send_cv); + cv_wait(&ic->ic_send_cv, ic->ic_lock); + } + //ICL_DEBUG("send/receive threads terminated"); + + ICL_CONN_UNLOCK(ic); + soclose(ic->ic_socket); + ICL_CONN_LOCK(ic); + ic->ic_socket = NULL; + + if (ic->ic_receive_pdu != NULL) { + //ICL_DEBUG("freeing partially received PDU"); + icl_pdu_free(ic->ic_receive_pdu); + ic->ic_receive_pdu = NULL; + } + + /* + * Remove any outstanding PDUs from the send queue. + */ + while (!STAILQ_EMPTY(&ic->ic_to_send)) { + pdu = STAILQ_FIRST(&ic->ic_to_send); + STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next); + icl_pdu_free(pdu); + } + + KASSERT(STAILQ_EMPTY(&ic->ic_to_send), + ("destroying session with non-empty send queue")); +#ifdef DIAGNOSTIC + KASSERT(ic->ic_outstanding_pdus == 0, + ("destroying session with %d outstanding PDUs", + ic->ic_outstanding_pdus)); +#endif + ICL_CONN_UNLOCK(ic); +} + +void +icl_soft_conn_close(struct icl_conn *ic) +{ + + icl_conn_close(ic); +} + +bool +icl_soft_conn_connected(struct icl_conn *ic) +{ + ICL_CONN_LOCK_ASSERT_NOT(ic); + + ICL_CONN_LOCK(ic); + if (ic->ic_socket == NULL) { + ICL_CONN_UNLOCK(ic); + return (false); + } + if (ic->ic_socket->so_error != 0) { + ICL_CONN_UNLOCK(ic); + return (false); + } + ICL_CONN_UNLOCK(ic); + return (true); +} + +static int +icl_soft_limits(size_t *limitp) +{ + + *limitp = 128 * 1024; + + return (0); +} + +#ifdef ICL_KERNEL_PROXY +int +icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so) +{ + int error; + + ICL_CONN_LOCK_ASSERT_NOT(ic); + + if (so->so_type != SOCK_STREAM) + return (EINVAL); + + ICL_CONN_LOCK(ic); + if (ic->ic_socket != NULL) { + ICL_CONN_UNLOCK(ic); + return (EBUSY); + } + ic->ic_socket = so; + ICL_CONN_UNLOCK(ic); + + error = icl_conn_start(ic); + + return (error); +} +#endif /* ICL_KERNEL_PROXY */ + +static int +icl_soft_load(void) +{ + int error; + + icl_pdu_zone = uma_zcreate("icl_pdu", + sizeof(struct icl_pdu), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + refcount_init(&icl_ncons, 0); + + /* + * The reason we call this "none" is that to the user, + * it's known as "offload driver"; "offload driver: soft" + * doesn't make much sense. + */ + error = icl_register("none", 0, icl_soft_limits, icl_soft_new_conn); + KASSERT(error == 0, ("failed to register")); + + return (error); +} + +static int +icl_soft_unload(void) +{ + + if (icl_ncons != 0) + return (EBUSY); + + icl_unregister("none"); + + uma_zdestroy(icl_pdu_zone); + + return (0); +} + +static int +icl_soft_modevent(module_t mod, int what, void *arg) +{ + + switch (what) { + case MOD_LOAD: + return (icl_soft_load()); + case MOD_UNLOAD: + return (icl_soft_unload()); + default: + return (EINVAL); + } +} + +moduledata_t icl_soft_data = { + "icl_soft", + icl_soft_modevent, + 0 +}; + +DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); +MODULE_DEPEND(icl_soft, icl, 1, 1, 1); +MODULE_VERSION(icl, 1); diff --git a/sys/dev/iscsi/icl_wrappers.h b/sys/dev/iscsi/icl_wrappers.h new file mode 100644 index 000000000000..374213970462 --- /dev/null +++ b/sys/dev/iscsi/icl_wrappers.h @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Edward Tomasz Napierala under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This file is used to provide the initiator and target with a prettier + * interface. It must not be included by ICL modules, such as icl_soft.c. + */ + +#ifndef ICL_WRAPPERS_H +#define ICL_WRAPPERS_H + +#include <sys/kobj.h> +#include <sys/condvar.h> + +#include <dev/iscsi/icl.h> +#include <icl_conn_if.h> + +static inline struct icl_pdu * +icl_pdu_new(struct icl_conn *ic, int flags) +{ + + return (ICL_CONN_NEW_PDU(ic, flags)); +} + +static inline size_t +icl_pdu_data_segment_length(const struct icl_pdu *ip) +{ + + return (ICL_CONN_PDU_DATA_SEGMENT_LENGTH(ip->ip_conn, ip)); +} + +static inline int +icl_pdu_append_data(struct icl_pdu *ip, const void *addr, size_t len, int flags) +{ + + return (ICL_CONN_PDU_APPEND_DATA(ip->ip_conn, ip, addr, len, flags)); +} + +static inline void +icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len) +{ + + ICL_CONN_PDU_GET_DATA(ip->ip_conn, ip, off, addr, len); +} + +static inline void +icl_pdu_queue(struct icl_pdu *ip) +{ + + ICL_CONN_PDU_QUEUE(ip->ip_conn, ip); +} + +static inline void +icl_pdu_free(struct icl_pdu *ip) +{ + + ICL_CONN_PDU_FREE(ip->ip_conn, ip); +} + +static inline void +icl_conn_free(struct icl_conn *ic) +{ + + ICL_CONN_FREE(ic); +} + +static inline int +icl_conn_handoff(struct icl_conn *ic, int fd) +{ + + return (ICL_CONN_HANDOFF(ic, fd)); +} + +static inline void +icl_conn_close(struct icl_conn *ic) +{ + + ICL_CONN_CLOSE(ic); +} + +static inline bool +icl_conn_connected(struct icl_conn *ic) +{ + + return (ICL_CONN_CONNECTED(ic)); +} + +#endif /* !ICL_WRAPPERS_H */ diff --git a/sys/dev/iscsi/iscsi.c b/sys/dev/iscsi/iscsi.c index 0d762aadb837..8f07620a697d 100644 --- a/sys/dev/iscsi/iscsi.c +++ b/sys/dev/iscsi/iscsi.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include <cam/scsi/scsi_message.h> #include <dev/iscsi/icl.h> +#include <dev/iscsi/icl_wrappers.h> #include <dev/iscsi/iscsi_ioctl.h> #include <dev/iscsi/iscsi_proto.h> #include <dev/iscsi/iscsi.h> @@ -1730,7 +1731,7 @@ iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa) return (EBUSY); } - is->is_conn = icl_conn_new("iscsi", &is->is_lock); + is->is_conn = icl_new_conn(NULL, "iscsi", &is->is_lock); is->is_conn->ic_receive = iscsi_receive_callback; is->is_conn->ic_error = iscsi_error_callback; is->is_conn->ic_prv0 = is; diff --git a/sys/dev/usb/input/uhid.c b/sys/dev/usb/input/uhid.c index 02642a00b8b9..ece5e9573309 100644 --- a/sys/dev/usb/input/uhid.c +++ b/sys/dev/usb/input/uhid.c @@ -734,7 +734,7 @@ uhid_attach(device_t dev) if (uaa->info.idProduct == USB_PRODUCT_WACOM_GRAPHIRE) { sc->sc_repdesc_size = sizeof(uhid_graphire_report_descr); - sc->sc_repdesc_ptr = (void *)&uhid_graphire_report_descr; + sc->sc_repdesc_ptr = __DECONST(void *, &uhid_graphire_report_descr); sc->sc_flags |= UHID_FLAG_STATIC_DESC; } else if (uaa->info.idProduct == USB_PRODUCT_WACOM_GRAPHIRE3_4X5) { @@ -755,7 +755,7 @@ uhid_attach(device_t dev) usbd_errstr(error)); } sc->sc_repdesc_size = sizeof(uhid_graphire3_4x5_report_descr); - sc->sc_repdesc_ptr = (void *)&uhid_graphire3_4x5_report_descr; + sc->sc_repdesc_ptr = __DECONST(void *, &uhid_graphire3_4x5_report_descr); sc->sc_flags |= UHID_FLAG_STATIC_DESC; } } else if ((uaa->info.bInterfaceClass == UICLASS_VENDOR) && @@ -775,7 +775,7 @@ uhid_attach(device_t dev) } /* the Xbox 360 gamepad has no report descriptor */ sc->sc_repdesc_size = sizeof(uhid_xb360gp_report_descr); - sc->sc_repdesc_ptr = (void *)&uhid_xb360gp_report_descr; + sc->sc_repdesc_ptr = __DECONST(void *, &uhid_xb360gp_report_descr); sc->sc_flags |= UHID_FLAG_STATIC_DESC; } if (sc->sc_repdesc_ptr == NULL) { diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index c1930f159ca4..af0a8af29050 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1434,7 +1434,8 @@ tmpfs_check_mtime(struct vnode *vp) if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { obj->flags &= ~OBJ_TMPFS_DIRTY; node = VP_TO_TMPFS_NODE(vp); - node->tn_status |= TMPFS_NODE_MODIFIED; + node->tn_status |= TMPFS_NODE_MODIFIED | + TMPFS_NODE_CHANGED; } VM_OBJECT_WUNLOCK(obj); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index 65c5f82d5de5..885f84cdf2b5 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -453,7 +453,6 @@ tmpfs_write(struct vop_write_args *v) struct tmpfs_node *node; off_t oldsize; int error, ioflag; - boolean_t extended; vp = v->a_vp; uio = v->a_uio; @@ -473,8 +472,7 @@ tmpfs_write(struct vop_write_args *v) return (EFBIG); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); - extended = uio->uio_offset + uio->uio_resid > node->tn_size; - if (extended) { + if (uio->uio_offset + uio->uio_resid > node->tn_size) { error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, FALSE); if (error != 0) @@ -483,7 +481,7 @@ tmpfs_write(struct vop_write_args *v) error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio); node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | - (extended ? TMPFS_NODE_CHANGED : 0); + TMPFS_NODE_CHANGED; if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 5b42c6fd118a..317e05bfb57a 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1302,6 +1302,47 @@ umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) return (1); } +static struct umtx_pi * +umtx_pi_next(struct umtx_pi *pi) +{ + struct umtx_q *uq_owner; + + if (pi->pi_owner == NULL) + return (NULL); + uq_owner = pi->pi_owner->td_umtxq; + if (uq_owner == NULL) + return (NULL); + return (uq_owner->uq_pi_blocked); +} + +/* + * Floyd's Cycle-Finding Algorithm. + */ +static bool +umtx_pi_check_loop(struct umtx_pi *pi) +{ + struct umtx_pi *pi1; /* fast iterator */ + + mtx_assert(&umtx_lock, MA_OWNED); + if (pi == NULL) + return (false); + pi1 = pi; + for (;;) { + pi = umtx_pi_next(pi); + if (pi == NULL) + break; + pi1 = umtx_pi_next(pi1); + if (pi1 == NULL) + break; + pi1 = umtx_pi_next(pi1); + if (pi1 == NULL) + break; + if (pi == pi1) + return (true); + } + return (false); +} + /* * Propagate priority when a thread is blocked on POSIX * PI mutex. @@ -1319,6 +1360,8 @@ umtx_propagate_priority(struct thread *td) pi = uq->uq_pi_blocked; if (pi == NULL) return; + if (umtx_pi_check_loop(pi)) + return; for (;;) { td = pi->pi_owner; @@ -1362,6 +1405,8 @@ umtx_repropagate_priority(struct umtx_pi *pi) mtx_assert(&umtx_lock, MA_OWNED); + if (umtx_pi_check_loop(pi)) + return; while (pi != NULL && pi->pi_owner != NULL) { pri = PRI_MAX; uq_owner = pi->pi_owner->td_umtxq; @@ -1694,6 +1739,11 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, continue; } + if ((owner & ~UMUTEX_CONTESTED) == id) { + error = EDEADLK; + break; + } + if (try != 0) { error = EBUSY; break; diff --git a/sys/mips/atheros/ar71xx_gpio.c b/sys/mips/atheros/ar71xx_gpio.c index 45ac8ffda2ed..b48e4a6de21b 100644 --- a/sys/mips/atheros/ar71xx_gpio.c +++ b/sys/mips/atheros/ar71xx_gpio.c @@ -341,7 +341,6 @@ static int ar71xx_gpio_attach(device_t dev) { struct ar71xx_gpio_softc *sc = device_get_softc(dev); - int error = 0; int i, j, maxpin; int mask, pinon; uint32_t oe; @@ -358,14 +357,14 @@ ar71xx_gpio_attach(device_t dev) if (sc->gpio_mem_res == NULL) { device_printf(dev, "couldn't map memory\n"); - error = ENXIO; ar71xx_gpio_detach(dev); - return(error); + return (ENXIO); } if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); + ar71xx_gpio_detach(dev); return (ENXIO); } @@ -373,6 +372,7 @@ ar71xx_gpio_attach(device_t dev) ar71xx_gpio_filter, ar71xx_gpio_intr, sc, &sc->gpio_ih))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); + ar71xx_gpio_detach(dev); return (ENXIO); } @@ -447,12 +447,16 @@ ar71xx_gpio_detach(device_t dev) KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized")); bus_generic_detach(dev); - + if (sc->gpio_ih) + bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih); + if (sc->gpio_irq_res) + bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid, + sc->gpio_irq_res); if (sc->gpio_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid, sc->gpio_mem_res); - - free(sc->gpio_pins, M_DEVBUF); + if (sc->gpio_pins) + free(sc->gpio_pins, M_DEVBUF); mtx_destroy(&sc->gpio_mtx); return(0); diff --git a/sys/mips/atheros/ar71xx_gpiovar.h b/sys/mips/atheros/ar71xx_gpiovar.h index a1c6e2f1c5a6..32337ccea87a 100644 --- a/sys/mips/atheros/ar71xx_gpiovar.h +++ b/sys/mips/atheros/ar71xx_gpiovar.h @@ -57,12 +57,12 @@ struct ar71xx_gpio_softc { device_t dev; - struct mtx gpio_mtx; - struct resource *gpio_mem_res; - int gpio_mem_rid; - struct resource *gpio_irq_res; - int gpio_irq_rid; - void *gpio_ih; + struct mtx gpio_mtx; + struct resource *gpio_mem_res; + int gpio_mem_rid; + struct resource *gpio_irq_res; + int gpio_irq_rid; + void *gpio_ih; int gpio_npins; struct gpio_pin *gpio_pins; }; diff --git a/sys/mips/cavium/octeon_gpio.c b/sys/mips/cavium/octeon_gpio.c index 36868481a996..1053976c508f 100644 --- a/sys/mips/cavium/octeon_gpio.c +++ b/sys/mips/cavium/octeon_gpio.c @@ -383,6 +383,7 @@ octeon_gpio_attach(device_t dev) OCTEON_IRQ_GPIO0 + i, OCTEON_IRQ_GPIO0 + i, 1, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); + octeon_gpio_detach(dev); return (ENXIO); } @@ -392,6 +393,7 @@ octeon_gpio_attach(device_t dev) &(sc->gpio_intr_cookies[i]), &sc->gpio_ih[i]))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); + octeon_gpio_detach(dev); return (ENXIO); } } @@ -448,11 +450,14 @@ octeon_gpio_detach(device_t dev) KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized")); for ( i = 0; i < OCTEON_GPIO_IRQS; i++) { - bus_release_resource(dev, SYS_RES_IRQ, - sc->gpio_irq_rid[i], sc->gpio_irq_res[i]); + if (sc->gpio_ih[i]) + bus_teardown_intr(dev, sc->gpio_irq_res[i], + sc->gpio_ih[i]); + if (sc->gpio_irq_res[i]) + bus_release_resource(dev, SYS_RES_IRQ, + sc->gpio_irq_rid[i], sc->gpio_irq_res[i]); } bus_generic_detach(dev); - mtx_destroy(&sc->gpio_mtx); return(0); diff --git a/sys/mips/cavium/octeon_gpiovar.h b/sys/mips/cavium/octeon_gpiovar.h index 5717b7cc8636..a9b814fefb8d 100644 --- a/sys/mips/cavium/octeon_gpiovar.h +++ b/sys/mips/cavium/octeon_gpiovar.h @@ -43,11 +43,11 @@ struct octeon_gpio_softc { device_t dev; - struct mtx gpio_mtx; - struct resource *gpio_irq_res[OCTEON_GPIO_IRQS]; - int gpio_irq_rid[OCTEON_GPIO_IRQS]; - void *gpio_ih[OCTEON_GPIO_IRQS]; - void *gpio_intr_cookies[OCTEON_GPIO_IRQS]; + struct mtx gpio_mtx; + struct resource *gpio_irq_res[OCTEON_GPIO_IRQS]; + int gpio_irq_rid[OCTEON_GPIO_IRQS]; + void *gpio_ih[OCTEON_GPIO_IRQS]; + void *gpio_intr_cookies[OCTEON_GPIO_IRQS]; int gpio_npins; struct gpio_pin gpio_pins[OCTEON_GPIO_PINS]; }; diff --git a/sys/mips/rt305x/rt305x_gpio.c b/sys/mips/rt305x/rt305x_gpio.c index a5d72549bd03..bf03bd149fcd 100644 --- a/sys/mips/rt305x/rt305x_gpio.c +++ b/sys/mips/rt305x/rt305x_gpio.c @@ -430,7 +430,7 @@ static int rt305x_gpio_attach(device_t dev) { struct rt305x_gpio_softc *sc = device_get_softc(dev); - int error = 0, i; + int i; uint64_t avlpins = 0; sc->reset_gpio = DAP1350_RESET_GPIO; @@ -446,14 +446,14 @@ rt305x_gpio_attach(device_t dev) if (sc->gpio_mem_res == NULL) { device_printf(dev, "couldn't map memory\n"); - error = ENXIO; rt305x_gpio_detach(dev); - return(error); + return (ENXIO); } if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "unable to allocate IRQ resource\n"); + rt305x_gpio_detach(dev); return (ENXIO); } @@ -462,6 +462,7 @@ rt305x_gpio_attach(device_t dev) rt305x_gpio_intr, NULL, sc, &sc->gpio_ih))) { device_printf(dev, "WARNING: unable to register interrupt handler\n"); + rt305x_gpio_detach(dev); return (ENXIO); } @@ -515,11 +516,14 @@ rt305x_gpio_detach(device_t dev) KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized")); bus_generic_detach(dev); - + if (sc->gpio_ih) + bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih); + if (sc->gpio_irq_res) + bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid, + sc->gpio_irq_res); if (sc->gpio_mem_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid, sc->gpio_mem_res); - mtx_destroy(&sc->gpio_mtx); return(0); diff --git a/sys/mips/rt305x/rt305x_gpiovar.h b/sys/mips/rt305x/rt305x_gpiovar.h index ed79e2db90be..85cd6f6429da 100644 --- a/sys/mips/rt305x/rt305x_gpiovar.h +++ b/sys/mips/rt305x/rt305x_gpiovar.h @@ -30,12 +30,12 @@ struct rt305x_gpio_softc { device_t dev; - struct mtx gpio_mtx; - struct resource *gpio_mem_res; - int gpio_mem_rid; - struct resource *gpio_irq_res; - int gpio_irq_rid; - void *gpio_ih; + struct mtx gpio_mtx; + struct resource *gpio_mem_res; + int gpio_mem_rid; + struct resource *gpio_irq_res; + int gpio_irq_rid; + void *gpio_ih; int gpio_npins; struct gpio_pin gpio_pins[NGPIO]; int reset_gpio; diff --git a/sys/modules/ctl/Makefile b/sys/modules/ctl/Makefile index 77dc4e12b382..e97ec38d2b55 100644 --- a/sys/modules/ctl/Makefile +++ b/sys/modules/ctl/Makefile @@ -22,8 +22,11 @@ SRCS+= scsi_ctl.c SRCS+= bus_if.h SRCS+= device_if.h SRCS+= vnode_if.h +SRCS+= icl_conn_if.h SRCS+= opt_cam.h #CFLAGS+=-DICL_KERNEL_PROXY +MFILES= kern/bus_if.m kern/device_if.m dev/iscsi/icl_conn_if.m + .include <bsd.kmod.mk> diff --git a/sys/modules/iscsi/Makefile b/sys/modules/iscsi/Makefile index 2bff545ef2ff..74a971c92aae 100644 --- a/sys/modules/iscsi/Makefile +++ b/sys/modules/iscsi/Makefile @@ -6,10 +6,15 @@ KMOD= iscsi SRCS= iscsi.c SRCS+= icl.c SRCS+= icl_proxy.c +SRCS+= icl_soft.c SRCS+= opt_cam.h SRCS+= bus_if.h SRCS+= device_if.h +SRCS+= icl_conn_if.c +SRCS+= icl_conn_if.h #CFLAGS+=-DICL_KERNEL_PROXY +MFILES= kern/bus_if.m kern/device_if.m dev/iscsi/icl_conn_if.m + .include <bsd.kmod.mk> diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 9b6c695d37fb..64b99fce0918 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -752,6 +752,8 @@ extern pid_t pid_max; #define STOPEVENT(p, e, v) do { \ + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ + "checking stopevent %d", (e)); \ if ((p)->p_stops & (e)) { \ PROC_LOCK(p); \ stopevent((p), (e), (v)); \ |