aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Munro <tmunro@FreeBSD.org>2020-06-21 08:51:24 +0000
committerThomas Munro <tmunro@FreeBSD.org>2020-06-21 08:51:24 +0000
commitf2706588730a5d3b9a687ba8d4269e386650cc4f (patch)
tree6af7a3efa2502ab3e819c7aa522f1fd908696bab
parent03270b59eed68cc33478a731a12269870e5168d2 (diff)
downloadsrc-f2706588730a5d3b9a687ba8d4269e386650cc4f.tar.gz
src-f2706588730a5d3b9a687ba8d4269e386650cc4f.zip
vfs: track sequential reads and writes separately
For software like PostgreSQL and SQLite that sometimes reads sequentially while also writing sequentially some distance behind with interleaved syscalls on the same fd, performance is better on UFS if we do sequential access heuristics separately for reads and writes. Patch originally by Andrew Gierth in 2008, updated and proposed by me with his permission. Reviewed by: mjg, kib, tmunro Approved by: mjg (mentor) Obtained from: Andrew Gierth <andrew@tao11.riddles.org.uk> Differential Revision: https://reviews.freebsd.org/D25024
Notes
Notes: svn path=/head/; revision=362460
-rw-r--r--sys/compat/cloudabi/cloudabi_file.c3
-rw-r--r--sys/fs/devfs/devfs_vnops.c4
-rw-r--r--sys/kern/kern_descrip.c2
-rw-r--r--sys/kern/vfs_syscalls.c6
-rw-r--r--sys/kern/vfs_vnops.c41
-rw-r--r--sys/sys/file.h7
6 files changed, 37 insertions, 26 deletions
diff --git a/sys/compat/cloudabi/cloudabi_file.c b/sys/compat/cloudabi/cloudabi_file.c
index f998809665bb..f3b33299b19e 100644
--- a/sys/compat/cloudabi/cloudabi_file.c
+++ b/sys/compat/cloudabi/cloudabi_file.c
@@ -287,7 +287,8 @@ cloudabi_sys_file_open(struct thread *td,
/* Install vnode operations if no custom operations are provided. */
if (fp->f_ops == &badfileops) {
- fp->f_seqcount = 1;
+ fp->f_seqcount[UIO_READ] = 1;
+ fp->f_seqcount[UIO_WRITE] = 1;
finit(fp, (fflags & FMASK) | (fp->f_flag & FHASLOCK),
DTYPE_VNODE, vp, &vnops);
}
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index f038f74e67a0..e7e524d2f0b9 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -1305,7 +1305,7 @@ devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
td->td_fpop = fpop;
dev_relthread(dev, ref);
- foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
+ foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_R);
return (error);
}
@@ -1802,7 +1802,7 @@ devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
td->td_fpop = fpop;
dev_relthread(dev, ref);
- foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF);
+ foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_W);
return (error);
}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 7b23d2f49897..62c4d9a334cc 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -795,7 +795,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
if (arg >= 0) {
bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
arg = MIN(arg, INT_MAX - bsize + 1);
- fp->f_seqcount = MIN(IO_SEQMAX,
+ fp->f_seqcount[UIO_READ] = MIN(IO_SEQMAX,
(arg + bsize - 1) / bsize);
atomic_set_int(&fp->f_flag, FRDAHEAD);
} else {
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 16ea55922a1a..843d02831d87 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1124,7 +1124,8 @@ kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
*/
if (fp->f_ops == &badfileops) {
KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
- fp->f_seqcount = 1;
+ fp->f_seqcount[UIO_READ] = 1;
+ fp->f_seqcount[UIO_WRITE] = 1;
finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
DTYPE_VNODE, vp, &vnops);
}
@@ -4442,7 +4443,8 @@ sys_fhopen(struct thread *td, struct fhopen_args *uap)
td->td_dupfd = 0;
#endif
fp->f_vnode = vp;
- fp->f_seqcount = 1;
+ fp->f_seqcount[UIO_READ] = 1;
+ fp->f_seqcount[UIO_WRITE] = 1;
finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
&vnops);
VOP_UNLOCK(vp);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 95a8e76af6c1..c9cefe7c78cc 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -489,10 +489,13 @@ vn_close(struct vnode *vp, int flags, struct ucred *file_cred,
static int
sequential_heuristic(struct uio *uio, struct file *fp)
{
+ enum uio_rw rw;
ASSERT_VOP_LOCKED(fp->f_vnode, __func__);
+
+ rw = uio->uio_rw;
if (fp->f_flag & FRDAHEAD)
- return (fp->f_seqcount << IO_SEQSHIFT);
+ return (fp->f_seqcount[rw] << IO_SEQSHIFT);
/*
* Offset 0 is handled specially. open() sets f_seqcount to 1 so
@@ -501,8 +504,8 @@ sequential_heuristic(struct uio *uio, struct file *fp)
* unless previous seeks have reduced f_seqcount to 0, in which
* case offset 0 is not special.
*/
- if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
- uio->uio_offset == fp->f_nextoff) {
+ if ((uio->uio_offset == 0 && fp->f_seqcount[rw] > 0) ||
+ uio->uio_offset == fp->f_nextoff[rw]) {
/*
* f_seqcount is in units of fixed-size blocks so that it
* depends mainly on the amount of sequential I/O and not
@@ -513,20 +516,20 @@ sequential_heuristic(struct uio *uio, struct file *fp)
* to any block size used by software.
*/
if (uio->uio_resid >= IO_SEQMAX * 16384)
- fp->f_seqcount = IO_SEQMAX;
+ fp->f_seqcount[rw] = IO_SEQMAX;
else {
- fp->f_seqcount += howmany(uio->uio_resid, 16384);
- if (fp->f_seqcount > IO_SEQMAX)
- fp->f_seqcount = IO_SEQMAX;
+ fp->f_seqcount[rw] += howmany(uio->uio_resid, 16384);
+ if (fp->f_seqcount[rw] > IO_SEQMAX)
+ fp->f_seqcount[rw] = IO_SEQMAX;
}
- return (fp->f_seqcount << IO_SEQSHIFT);
+ return (fp->f_seqcount[rw] << IO_SEQSHIFT);
}
/* Not sequential. Quickly draw-down sequentiality. */
- if (fp->f_seqcount > 1)
- fp->f_seqcount = 1;
+ if (fp->f_seqcount[rw] > 1)
+ fp->f_seqcount[rw] = 1;
else
- fp->f_seqcount = 0;
+ fp->f_seqcount[rw] = 0;
return (0);
}
@@ -734,8 +737,10 @@ foffset_unlock(struct file *fp, off_t val, int flags)
if ((flags & FOF_NOUPDATE) == 0)
atomic_store_long(&fp->f_offset, val);
- if ((flags & FOF_NEXTOFF) != 0)
- fp->f_nextoff = val;
+ if ((flags & FOF_NEXTOFF_R) != 0)
+ fp->f_nextoff[UIO_READ] = val;
+ if ((flags & FOF_NEXTOFF_W) != 0)
+ fp->f_nextoff[UIO_WRITE] = val;
if ((flags & FOF_NOLOCK) != 0)
return;
@@ -788,8 +793,10 @@ foffset_unlock(struct file *fp, off_t val, int flags)
mtx_lock(mtxp);
if ((flags & FOF_NOUPDATE) == 0)
fp->f_offset = val;
- if ((flags & FOF_NEXTOFF) != 0)
- fp->f_nextoff = val;
+ if ((flags & FOF_NEXTOFF_R) != 0)
+ fp->f_nextoff[UIO_READ] = val;
+ if ((flags & FOF_NEXTOFF_W) != 0)
+ fp->f_nextoff[UIO_WRITE] = val;
if ((flags & FOF_NOLOCK) == 0) {
KASSERT((fp->f_vnread_flags & FOFFSET_LOCKED) != 0,
("Lost FOFFSET_LOCKED"));
@@ -878,7 +885,7 @@ vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
if (error == 0)
#endif
error = VOP_READ(vp, uio, ioflag, fp->f_cred);
- fp->f_nextoff = uio->uio_offset;
+ fp->f_nextoff[UIO_READ] = uio->uio_offset;
VOP_UNLOCK(vp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
orig_offset != uio->uio_offset)
@@ -953,7 +960,7 @@ vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
if (error == 0)
#endif
error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
- fp->f_nextoff = uio->uio_offset;
+ fp->f_nextoff[UIO_WRITE] = uio->uio_offset;
VOP_UNLOCK(vp);
if (vp->v_type != VCHR)
vn_finished_write(mp);
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 35c2a5b4a78a..65b73158f8d9 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -81,7 +81,8 @@ struct ucred;
#define FOF_OFFSET 0x01 /* Use the offset in uio argument */
#define FOF_NOLOCK 0x02 /* Do not take FOFFSET_LOCK */
-#define FOF_NEXTOFF 0x04 /* Also update f_nextoff */
+#define FOF_NEXTOFF_R 0x04 /* Also update f_nextoff[UIO_READ] */
+#define FOF_NEXTOFF_W 0x08 /* Also update f_nextoff[UIO_WRITE] */
#define FOF_NOUPDATE 0x10 /* Do not update f_offset */
off_t foffset_lock(struct file *fp, int flags);
void foffset_lock_uio(struct file *fp, struct uio *uio, int flags);
@@ -187,10 +188,10 @@ struct file {
* DTYPE_VNODE specific fields.
*/
union {
- int16_t f_seqcount; /* (a) Count of sequential accesses. */
+ int16_t f_seqcount[2]; /* (a) Count of seq. reads and writes. */
int f_pipegen;
};
- off_t f_nextoff; /* next expected read/write offset. */
+ off_t f_nextoff[2]; /* next expected read/write offset. */
union {
struct cdev_privdata *fvn_cdevpriv;
/* (d) Private data for the cdev. */