aboutsummaryrefslogtreecommitdiff
path: root/usr.sbin
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2015-04-18 20:10:19 +0000
committerAlexander Motin <mav@FreeBSD.org>2015-04-18 20:10:19 +0000
commitbb1524af0cb91ed8d64d4c86ac073be461ff55eb (patch)
tree8ca6046216bdf0cb81ec044a06c118576214e084 /usr.sbin
parent2ef26470c5364436fe67f4c4df9a7ae373aa76f8 (diff)
downloadsrc-bb1524af0cb91ed8d64d4c86ac073be461ff55eb.tar.gz
src-bb1524af0cb91ed8d64d4c86ac073be461ff55eb.zip
Workaround bhyve virtual disks operation on top of GEOM providers.
GEOM does not support scatter/gather lists in its I/Os. Such requests are cut in pieces by physio(), that may be problematic, if those pieces are not multiple of provider's sector size. If such case is detected, move the data through temporary sequential buffer. MFC after: 2 weeks
Notes
Notes: svn path=/head/; revision=281700
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/bhyve/block_if.c108
-rw-r--r--usr.sbin/bhyve/block_if.h1
-rw-r--r--usr.sbin/bhyve/pci_ahci.c4
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c5
4 files changed, 100 insertions, 18 deletions
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index b8b27a659df8..bcb1617d4c09 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -85,6 +85,7 @@ struct blockif_ctxt {
int bc_magic;
int bc_fd;
int bc_ischr;
+ int bc_isgeom;
int bc_candelete;
int bc_rdonly;
off_t bc_size;
@@ -198,27 +199,93 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
}
static void
-blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
+blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
struct blockif_req *br;
off_t arg[2];
- int err;
+ ssize_t clen, len, off, boff, voff;
+ int i, err;
br = be->be_req;
+ if (br->br_iovcnt <= 1)
+ buf = NULL;
err = 0;
-
switch (be->be_op) {
case BOP_READ:
- if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
- err = errno;
+ if (buf == NULL) {
+ if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
+ br->br_offset)) < 0)
+ err = errno;
+ else
+ br->br_resid -= len;
+ break;
+ }
+ i = 0;
+ off = voff = 0;
+ while (br->br_resid > 0) {
+ len = MIN(br->br_resid, MAXPHYS);
+ if (pread(bc->bc_fd, buf, len, br->br_offset +
+ off) < 0) {
+ err = errno;
+ break;
+ }
+ boff = 0;
+ do {
+ clen = MIN(len - boff, br->br_iov[i].iov_len -
+ voff);
+ memcpy(br->br_iov[i].iov_base + voff,
+ buf + boff, clen);
+ if (clen < br->br_iov[i].iov_len - voff)
+ voff += clen;
+ else {
+ i++;
+ voff = 0;
+ }
+ boff += clen;
+ } while (boff < len);
+ off += len;
+ br->br_resid -= len;
+ }
break;
case BOP_WRITE:
- if (bc->bc_rdonly)
+ if (bc->bc_rdonly) {
err = EROFS;
- else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
- br->br_offset) < 0)
- err = errno;
+ break;
+ }
+ if (buf == NULL) {
+ if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
+ br->br_offset)) < 0)
+ err = errno;
+ else
+ br->br_resid -= len;
+ break;
+ }
+ i = 0;
+ off = voff = 0;
+ while (br->br_resid > 0) {
+ len = MIN(br->br_resid, MAXPHYS);
+ boff = 0;
+ do {
+ clen = MIN(len - boff, br->br_iov[i].iov_len -
+ voff);
+ memcpy(buf + boff,
+ br->br_iov[i].iov_base + voff, clen);
+ if (clen < br->br_iov[i].iov_len - voff)
+ voff += clen;
+ else {
+ i++;
+ voff = 0;
+ }
+ boff += clen;
+ } while (boff < len);
+ if (pwrite(bc->bc_fd, buf, len, br->br_offset +
+ off) < 0) {
+ err = errno;
+ break;
+ }
+ off += len;
+ br->br_resid -= len;
+ }
break;
case BOP_FLUSH:
if (bc->bc_ischr) {
@@ -234,9 +301,11 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
err = EROFS;
else if (bc->bc_ischr) {
arg[0] = br->br_offset;
- arg[1] = br->br_iov[0].iov_len;
+ arg[1] = br->br_resid;
if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
err = errno;
+ else
+ br->br_resid = 0;
} else
err = EOPNOTSUPP;
break;
@@ -256,15 +325,20 @@ blockif_thr(void *arg)
struct blockif_ctxt *bc;
struct blockif_elem *be;
pthread_t t;
+ uint8_t *buf;
bc = arg;
+ if (bc->bc_isgeom)
+ buf = malloc(MAXPHYS);
+ else
+ buf = NULL;
t = pthread_self();
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
while (blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
- blockif_proc(bc, be);
+ blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
@@ -275,6 +349,8 @@ blockif_thr(void *arg)
}
pthread_mutex_unlock(&bc->bc_mtx);
+ if (buf)
+ free(buf);
pthread_exit(NULL);
return (NULL);
}
@@ -315,13 +391,14 @@ struct blockif_ctxt *
blockif_open(const char *optstr, const char *ident)
{
char tname[MAXCOMLEN + 1];
+ char name[MAXPATHLEN];
char *nopt, *xopts;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
- int nocache, sync, ro, candelete;
+ int nocache, sync, ro, candelete, geom;
pthread_once(&blockif_once, blockif_init);
@@ -375,7 +452,7 @@ blockif_open(const char *optstr, const char *ident)
size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
- candelete = 0;
+ candelete = geom = 0;
if (S_ISCHR(sbuf.st_mode)) {
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@@ -391,6 +468,8 @@ blockif_open(const char *optstr, const char *ident)
arg.len = sizeof(arg.value.i);
if (ioctl(fd, DIOCGATTR, &arg) == 0)
candelete = arg.value.i;
+ if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
+ geom = 1;
} else
psectsz = sbuf.st_blksize;
@@ -403,6 +482,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
+ bc->bc_isgeom = geom;
bc->bc_candelete = candelete;
bc->bc_rdonly = ro;
bc->bc_size = size;
diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h
index 1cb7c8048688..8e63407e36b8 100644
--- a/usr.sbin/bhyve/block_if.h
+++ b/usr.sbin/bhyve/block_if.h
@@ -45,6 +45,7 @@ struct blockif_req {
struct iovec br_iov[BLOCKIF_IOV_MAX];
int br_iovcnt;
off_t br_offset;
+ ssize_t br_resid;
void (*br_callback)(struct blockif_req *req, int err);
void *br_param;
};
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 87a1cead20da..95cfdd9d2ba3 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -571,6 +571,7 @@ ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
}
breq->br_iovcnt = j;
+ breq->br_resid = todo;
aior->done += todo;
aior->more = (aior->done < aior->len && i < prdtl);
}
@@ -776,8 +777,7 @@ next:
breq = &aior->io_req;
breq->br_offset = elba * blockif_sectsz(p->bctx);
- breq->br_iovcnt = 1;
- breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
+ breq->br_resid = elen * blockif_sectsz(p->bctx);
/*
* Mark this command in-flight.
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 967b52855b4c..35daa1f3921e 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -198,7 +198,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
struct pci_vtblk_ioreq *io;
int i, n;
int err;
- int iolen;
+ ssize_t iolen;
int writeop, type;
off_t offset;
struct iovec iov[BLOCKIF_IOV_MAX + 2];
@@ -246,8 +246,9 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
iolen += iov[i].iov_len;
}
+ io->io_req.br_resid = iolen;
- DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
+ DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
writeop ? "write" : "read/ident", iolen, i - 1, offset));
switch (type) {