diff options
author | Pawel Jakub Dawidek <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
---|---|---|
committer | Pawel Jakub Dawidek <pjd@FreeBSD.org> | 2008-11-17 20:49:29 +0000 |
commit | 1ba4a712dde6e6c613fc411a96958b4ade67de4c (patch) | |
tree | 81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c | |
parent | 8fc061164d74a4c9775f39da3c0b5d02112866c8 (diff) |
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes:
- Delegated Administration
Allows regular users to perform ZFS operations, like file system
creation, snapshot creation, etc.
- L2ARC
Level 2 cache for ZFS - allows to use additional disks for cache.
Huge performance improvements mostly for random read of mostly
static content.
- slog
Allow to use additional disks for ZFS Intent Log to speed up
operations like fsync(2).
- vfs.zfs.super_owner
Allows regular users to perform privileged operations on files stored
on ZFS file systems owned by him. Very careful with this one.
- chflags(2)
Not all the flags are supported. This still needs work.
- ZFSBoot
Support to boot off of ZFS pool. Not finished, AFAIK.
Submitted by: dfr
- Snapshot properties
- New failure modes
Before if write requested failed, system paniced. Now one
can select from one of three failure modes:
- panic - panic on write error
- wait - wait for disk to reappear
- continue - serve read requests if possible, block write requests
- Refquota, refreservation properties
Just quota and reservation properties, but don't count space consumed
by children file systems, clones and snapshots.
- Sparse volumes
ZVOLs that don't reserve space in the pool.
- External attributes
Compatible with extattr(2).
- NFSv4-ACLs
Not sure about the status, might not be complete yet.
Submitted by: trasz
- Creation-time properties
- Regression tests for zpool(8) command.
Obtained from: OpenSolaris
Notes
Notes:
svn path=/head/; revision=185029
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c')
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c | 84 |
1 files changed, 44 insertions, 40 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c index 8ef524f71931..cd4d5aef241f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/zfs_context.h> #include <sys/spa.h> #include <sys/vdev_impl.h> @@ -55,6 +53,25 @@ int zfs_vdev_ramp_rate = 2; */ int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE; +SYSCTL_DECL(_vfs_zfs_vdev); +TUNABLE_INT("vfs.zfs.vdev.max_pending", &zfs_vdev_max_pending); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, max_pending, CTLFLAG_RDTUN, + &zfs_vdev_max_pending, 0, "Maximum I/O requests pending on each device"); +TUNABLE_INT("vfs.zfs.vdev.min_pending", &zfs_vdev_min_pending); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, min_pending, CTLFLAG_RDTUN, + &zfs_vdev_min_pending, 0, + "Initial number of I/O requests pending to each device"); +TUNABLE_INT("vfs.zfs.vdev.time_shift", &zfs_vdev_time_shift); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, time_shift, CTLFLAG_RDTUN, + &zfs_vdev_time_shift, 0, "Used for calculating I/O request deadline"); +TUNABLE_INT("vfs.zfs.vdev.ramp_rate", &zfs_vdev_ramp_rate); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, ramp_rate, CTLFLAG_RDTUN, + &zfs_vdev_ramp_rate, 0, "Exponential I/O issue ramp-up rate"); +TUNABLE_INT("vfs.zfs.vdev.aggregation_limit", &zfs_vdev_aggregation_limit); +SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, aggregation_limit, CTLFLAG_RDTUN, + &zfs_vdev_aggregation_limit, 0, + "I/O requests are aggregated up to this size"); + /* * Virtual device vector for disk I/O scheduling. */ @@ -162,7 +179,7 @@ vdev_queue_agg_io_done(zio_t *aio) aio->io_delegate_list = dio->io_delegate_next; dio->io_delegate_next = NULL; dio->io_error = aio->io_error; - zio_next_stage(dio); + zio_execute(dio); } ASSERT3U(offset, ==, aio->io_size); @@ -172,11 +189,8 @@ vdev_queue_agg_io_done(zio_t *aio) #define IS_ADJACENT(io, nio) \ ((io)->io_offset + (io)->io_size == (nio)->io_offset) -typedef void zio_issue_func_t(zio_t *); - static zio_t * -vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, - zio_issue_func_t **funcp) +vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit) { zio_t *fio, *lio, *aio, *dio; avl_tree_t *tree; @@ -184,8 +198,6 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, ASSERT(MUTEX_HELD(&vq->vq_lock)); - *funcp = NULL; - if (avl_numnodes(&vq->vq_pending_tree) >= pending_limit || avl_numnodes(&vq->vq_deadline_tree) == 0) return (NULL); @@ -196,6 +208,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, size = fio->io_size; while ((dio = AVL_PREV(tree, fio)) != NULL && IS_ADJACENT(dio, fio) && + !((dio->io_flags | fio->io_flags) & ZIO_FLAG_DONT_AGGREGATE) && size + dio->io_size <= zfs_vdev_aggregation_limit) { dio->io_delegate_next = fio; fio = dio; @@ -203,6 +216,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, } while ((dio = AVL_NEXT(tree, lio)) != NULL && IS_ADJACENT(lio, dio) && + !((lio->io_flags | dio->io_flags) & ZIO_FLAG_DONT_AGGREGATE) && size + dio->io_size <= zfs_vdev_aggregation_limit) { lio->io_delegate_next = dio; lio = dio; @@ -212,15 +226,12 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, if (fio != lio) { char *buf = zio_buf_alloc(size); uint64_t offset = 0; - int nagg = 0; ASSERT(size <= zfs_vdev_aggregation_limit); - aio = zio_vdev_child_io(fio, NULL, fio->io_vd, - fio->io_offset, buf, size, fio->io_type, - ZIO_PRIORITY_NOW, ZIO_FLAG_DONT_QUEUE | - ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_PROPAGATE | - ZIO_FLAG_NOBOOKMARK, + aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset, + buf, size, fio->io_type, ZIO_PRIORITY_NOW, + ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_delegate_list = fio; @@ -233,19 +244,12 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, offset += dio->io_size; vdev_queue_io_remove(vq, dio); zio_vdev_io_bypass(dio); - nagg++; } ASSERT(offset == size); - dprintf("%5s T=%llu off=%8llx agg=%3d " - "old=%5llx new=%5llx\n", - zio_type_name[fio->io_type], - fio->io_deadline, fio->io_offset, nagg, fio->io_size, size); - avl_add(&vq->vq_pending_tree, aio); - *funcp = zio_nowait; return (aio); } @@ -254,8 +258,6 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit, avl_add(&vq->vq_pending_tree, fio); - *funcp = zio_next_stage; - return (fio); } @@ -264,7 +266,6 @@ vdev_queue_io(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; - zio_issue_func_t *func; ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); @@ -280,42 +281,45 @@ vdev_queue_io(zio_t *zio) mutex_enter(&vq->vq_lock); - zio->io_deadline = (zio->io_timestamp >> zfs_vdev_time_shift) + - zio->io_priority; + zio->io_deadline = (lbolt64 >> zfs_vdev_time_shift) + zio->io_priority; vdev_queue_io_add(vq, zio); - nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending, &func); + nio = vdev_queue_io_to_issue(vq, zfs_vdev_min_pending); mutex_exit(&vq->vq_lock); - if (nio == NULL || func != zio_nowait) - return (nio); + if (nio == NULL) + return (NULL); + + if (nio->io_done == vdev_queue_agg_io_done) { + zio_nowait(nio); + return (NULL); + } - func(nio); - return (NULL); + return (nio); } void vdev_queue_io_done(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; - zio_t *nio; - zio_issue_func_t *func; - int i; mutex_enter(&vq->vq_lock); avl_remove(&vq->vq_pending_tree, zio); - for (i = 0; i < zfs_vdev_ramp_rate; i++) { - nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending, &func); + for (int i = 0; i < zfs_vdev_ramp_rate; i++) { + zio_t *nio = vdev_queue_io_to_issue(vq, zfs_vdev_max_pending); if (nio == NULL) break; mutex_exit(&vq->vq_lock); - if (func == zio_next_stage) + if (nio->io_done == vdev_queue_agg_io_done) { + zio_nowait(nio); + } else { zio_vdev_io_reissue(nio); - func(nio); + zio_execute(nio); + } mutex_enter(&vq->vq_lock); } |