aboutsummaryrefslogtreecommitdiff
path: root/sys/nfsclient/nfs_bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/nfsclient/nfs_bio.c')
-rw-r--r--sys/nfsclient/nfs_bio.c1289
1 files changed, 0 insertions, 1289 deletions
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
deleted file mode 100644
index c9737006f1d4..000000000000
--- a/sys/nfsclient/nfs_bio.c
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Rick Macklem at The University of Guelph.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.64 1998/12/07 21:58:43 archie Exp $
- */
-
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/resourcevar.h>
-#include <sys/signalvar.h>
-#include <sys/proc.h>
-#include <sys/buf.h>
-#include <sys/vnode.h>
-#include <sys/mount.h>
-#include <sys/kernel.h>
-
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_prot.h>
-#include <vm/vm_page.h>
-#include <vm/vm_object.h>
-#include <vm/vm_pager.h>
-#include <vm/vnode_pager.h>
-
-#include <nfs/rpcv2.h>
-#include <nfs/nfsproto.h>
-#include <nfs/nfs.h>
-#include <nfs/nfsmount.h>
-#include <nfs/nqnfs.h>
-#include <nfs/nfsnode.h>
-
-static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
- struct proc *p));
-static void nfs_prot_buf __P((struct buf *bp, int off, int n));
-
-extern int nfs_numasync;
-extern struct nfsstats nfsstats;
-
-/*
- * Vnode op for VM getpages.
- */
-int
-nfs_getpages(ap)
- struct vop_getpages_args /* {
- struct vnode *a_vp;
- vm_page_t *a_m;
- int a_count;
- int a_reqpage;
- vm_ooffset_t a_offset;
- } */ *ap;
-{
- int i, error, nextoff, size, toff, npages, count;
- struct uio uio;
- struct iovec iov;
- vm_offset_t kva;
- struct buf *bp;
- struct vnode *vp;
- struct proc *p;
- struct ucred *cred;
- struct nfsmount *nmp;
- vm_page_t *pages;
-
- vp = ap->a_vp;
- p = curproc; /* XXX */
- cred = curproc->p_ucred; /* XXX */
- nmp = VFSTONFS(vp->v_mount);
- pages = ap->a_m;
- count = ap->a_count;
-
- if (vp->v_object == NULL) {
- printf("nfs_getpages: called with non-merged cache vnode??\n");
- return VM_PAGER_ERROR;
- }
-
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
- (void)nfs_fsinfo(nmp, vp, cred, p);
- /*
- * We use only the kva address for the buffer, but this is extremely
- * convienient and fast.
- */
- bp = getpbuf();
-
- npages = btoc(count);
- kva = (vm_offset_t) bp->b_data;
- pmap_qenter(kva, pages, npages);
-
- iov.iov_base = (caddr_t) kva;
- iov.iov_len = count;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
- uio.uio_resid = count;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = UIO_READ;
- uio.uio_procp = p;
-
- error = nfs_readrpc(vp, &uio, cred);
- pmap_qremove(kva, npages);
-
- relpbuf(bp);
-
- if (error && (uio.uio_resid == count))
- return VM_PAGER_ERROR;
-
- size = count - uio.uio_resid;
-
- for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
- vm_page_t m;
- nextoff = toff + PAGE_SIZE;
- m = pages[i];
-
- m->flags &= ~PG_ZERO;
-
- if (nextoff <= size) {
- m->valid = VM_PAGE_BITS_ALL;
- m->dirty = 0;
- } else {
- int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
- vm_page_set_validclean(m, 0, nvalid);
- }
-
- if (i != ap->a_reqpage) {
- /*
- * Whether or not to leave the page activated is up in
- * the air, but we should put the page on a page queue
- * somewhere (it already is in the object). Result:
- * It appears that emperical results show that
- * deactivating pages is best.
- */
-
- /*
- * Just in case someone was asking for this page we
- * now tell them that it is ok to use.
- */
- if (!error) {
- if (m->flags & PG_WANTED)
- vm_page_activate(m);
- else
- vm_page_deactivate(m);
- vm_page_wakeup(m);
- } else {
- vnode_pager_freepage(m);
- }
- }
- }
- return 0;
-}
-
-/*
- * Vnode op for VM putpages.
- */
-int
-nfs_putpages(ap)
- struct vop_putpages_args /* {
- struct vnode *a_vp;
- vm_page_t *a_m;
- int a_count;
- int a_sync;
- int *a_rtvals;
- vm_ooffset_t a_offset;
- } */ *ap;
-{
- struct uio uio;
- struct iovec iov;
- vm_offset_t kva;
- struct buf *bp;
- int iomode, must_commit, i, error, npages, count;
- int *rtvals;
- struct vnode *vp;
- struct proc *p;
- struct ucred *cred;
- struct nfsmount *nmp;
- vm_page_t *pages;
-
- vp = ap->a_vp;
- p = curproc; /* XXX */
- cred = curproc->p_ucred; /* XXX */
- nmp = VFSTONFS(vp->v_mount);
- pages = ap->a_m;
- count = ap->a_count;
- rtvals = ap->a_rtvals;
- npages = btoc(count);
-
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
- (void)nfs_fsinfo(nmp, vp, cred, p);
-
- for (i = 0; i < npages; i++) {
- rtvals[i] = VM_PAGER_AGAIN;
- }
-
- /*
- * We use only the kva address for the buffer, but this is extremely
- * convienient and fast.
- */
- bp = getpbuf();
-
- kva = (vm_offset_t) bp->b_data;
- pmap_qenter(kva, pages, npages);
-
- iov.iov_base = (caddr_t) kva;
- iov.iov_len = count;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
- uio.uio_resid = count;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = UIO_WRITE;
- uio.uio_procp = p;
-
- if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
- iomode = NFSV3WRITE_UNSTABLE;
- else
- iomode = NFSV3WRITE_FILESYNC;
-
- error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
-
- pmap_qremove(kva, npages);
- relpbuf(bp);
-
- if (!error) {
- int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
- for (i = 0; i < nwritten; i++) {
- rtvals[i] = VM_PAGER_OK;
- pages[i]->dirty = 0;
- }
- if (must_commit)
- nfs_clearcommit(vp->v_mount);
- }
- return rtvals[0];
-}
-
-/*
- * Vnode op for read using bio
- */
-int
-nfs_bioread(vp, uio, ioflag, cred, getpages)
- register struct vnode *vp;
- register struct uio *uio;
- int ioflag;
- struct ucred *cred;
- int getpages;
-{
- register struct nfsnode *np = VTONFS(vp);
- register int biosize, i;
- off_t diff;
- struct buf *bp = 0, *rabp;
- struct vattr vattr;
- struct proc *p;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn, rabn;
- int bufsize;
- int nra, error = 0, n = 0, on = 0, not_readin;
-
-#ifdef DIAGNOSTIC
- if (uio->uio_rw != UIO_READ)
- panic("nfs_read mode");
-#endif
- if (uio->uio_resid == 0)
- return (0);
- if (uio->uio_offset < 0) /* XXX VDIR cookies can be negative */
- return (EINVAL);
- p = uio->uio_procp;
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
- (void)nfs_fsinfo(nmp, vp, cred, p);
- if (vp->v_type != VDIR &&
- (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
- return (EFBIG);
- biosize = vp->v_mount->mnt_stat.f_iosize;
- /*
- * For nfs, cache consistency can only be maintained approximately.
- * Although RFC1094 does not specify the criteria, the following is
- * believed to be compatible with the reference port.
- * For nqnfs, full cache consistency is maintained within the loop.
- * For nfs:
- * If the file's modify time on the server has changed since the
- * last read rpc or you have written to the file,
- * you may have lost data cache consistency with the
- * server, so flush all of the file's data out of the cache.
- * Then force a getattr rpc to ensure that you have up to date
- * attributes.
- * NB: This implies that cache data can be read when up to
- * NFS_ATTRTIMEO seconds out of date. If you find that you need current
- * attributes this could be forced by setting n_attrstamp to 0 before
- * the VOP_GETATTR() call.
- */
- if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
- if (np->n_flag & NMODIFIED) {
- if (vp->v_type != VREG) {
- if (vp->v_type != VDIR)
- panic("nfs: bioread, not dir");
- nfs_invaldir(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- }
- np->n_attrstamp = 0;
- error = VOP_GETATTR(vp, &vattr, cred, p);
- if (error)
- return (error);
- np->n_mtime = vattr.va_mtime.tv_sec;
- } else {
- error = VOP_GETATTR(vp, &vattr, cred, p);
- if (error)
- return (error);
- if (np->n_mtime != vattr.va_mtime.tv_sec) {
- if (vp->v_type == VDIR)
- nfs_invaldir(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- np->n_mtime = vattr.va_mtime.tv_sec;
- }
- }
- }
- do {
-
- /*
- * Get a valid lease. If cached data is stale, flush it.
- */
- if (nmp->nm_flag & NFSMNT_NQNFS) {
- if (NQNFS_CKINVALID(vp, np, ND_READ)) {
- do {
- error = nqnfs_getlease(vp, ND_READ, cred, p);
- } while (error == NQNFS_EXPIRED);
- if (error)
- return (error);
- if (np->n_lrev != np->n_brev ||
- (np->n_flag & NQNFSNONCACHE) ||
- ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
- if (vp->v_type == VDIR)
- nfs_invaldir(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- np->n_brev = np->n_lrev;
- }
- } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
- nfs_invaldir(vp);
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- }
- }
- if (np->n_flag & NQNFSNONCACHE) {
- switch (vp->v_type) {
- case VREG:
- return (nfs_readrpc(vp, uio, cred));
- case VLNK:
- return (nfs_readlinkrpc(vp, uio, cred));
- case VDIR:
- break;
- default:
- printf(" NQNFSNONCACHE: type %x unexpected\n",
- vp->v_type);
- };
- }
- switch (vp->v_type) {
- case VREG:
- nfsstats.biocache_reads++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset & (biosize - 1);
- not_readin = 1;
-
- /*
- * Start the read ahead(s), as required.
- */
- if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
- for (nra = 0; nra < nmp->nm_readahead &&
- (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
- rabn = lbn + 1 + nra;
- if (!incore(vp, rabn)) {
- rabp = nfs_getcacheblk(vp, rabn, biosize, p);
- if (!rabp)
- return (EINTR);
- if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= (B_READ | B_ASYNC);
- vfs_busy_pages(rabp, 0);
- if (nfs_asyncio(rabp, cred)) {
- rabp->b_flags |= B_INVAL|B_ERROR;
- vfs_unbusy_pages(rabp);
- brelse(rabp);
- }
- } else
- brelse(rabp);
- }
- }
- }
-
- /*
- * If the block is in the cache and has the required data
- * in a valid region, just copy it out.
- * Otherwise, get the block and write back/read in,
- * as required.
- */
-again:
- bufsize = biosize;
- if ((off_t)(lbn + 1) * biosize > np->n_size &&
- (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
- bufsize = np->n_size - (off_t)lbn * biosize;
- bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
- }
- bp = nfs_getcacheblk(vp, lbn, bufsize, p);
- if (!bp)
- return (EINTR);
- /*
- * If we are being called from nfs_getpages, we must
- * make sure the buffer is a vmio buffer. The vp will
- * already be setup for vmio but there may be some old
- * non-vmio buffers attached to it.
- */
- if (getpages && !(bp->b_flags & B_VMIO)) {
-#ifdef DIAGNOSTIC
- printf("nfs_bioread: non vmio buf found, discarding\n");
-#endif
- bp->b_flags |= B_NOCACHE;
- bp->b_flags |= B_INVAFTERWRITE;
- if (bp->b_dirtyend > 0) {
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("nfsbioread");
- if (VOP_BWRITE(bp) == EINTR)
- return (EINTR);
- } else
- brelse(bp);
- goto again;
- }
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_flags |= B_READ;
- bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
- not_readin = 0;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(bp, cred, p);
- if (error) {
- brelse(bp);
- return (error);
- }
- }
- if (bufsize > on) {
- n = min((unsigned)(bufsize - on), uio->uio_resid);
- } else {
- n = 0;
- }
- diff = np->n_size - uio->uio_offset;
- if (diff < n)
- n = diff;
- if (not_readin && n > 0) {
- if (on < bp->b_validoff || (on + n) > bp->b_validend) {
- bp->b_flags |= B_NOCACHE;
- bp->b_flags |= B_INVAFTERWRITE;
- if (bp->b_dirtyend > 0) {
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("nfsbioread");
- if (VOP_BWRITE(bp) == EINTR)
- return (EINTR);
- } else
- brelse(bp);
- goto again;
- }
- }
- vp->v_lastr = lbn;
- diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
- if (diff < n)
- n = diff;
- break;
- case VLNK:
- nfsstats.biocache_readlinks++;
- bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
- if (!bp)
- return (EINTR);
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_flags |= B_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(bp, cred, p);
- if (error) {
- bp->b_flags |= B_ERROR;
- brelse(bp);
- return (error);
- }
- }
- n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
- on = 0;
- break;
- case VDIR:
- nfsstats.biocache_readdirs++;
- if (np->n_direofoffset
- && uio->uio_offset >= np->n_direofoffset) {
- return (0);
- }
- lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
- on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
- bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
- if (!bp)
- return (EINTR);
- if ((bp->b_flags & B_CACHE) == 0) {
- bp->b_flags |= B_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(bp, cred, p);
- if (error) {
- brelse(bp);
- }
- while (error == NFSERR_BAD_COOKIE) {
- nfs_invaldir(vp);
- error = nfs_vinvalbuf(vp, 0, cred, p, 1);
- /*
- * Yuck! The directory has been modified on the
- * server. The only way to get the block is by
- * reading from the beginning to get all the
- * offset cookies.
- */
- for (i = 0; i <= lbn && !error; i++) {
- if (np->n_direofoffset
- && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
- return (0);
- bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
- if (!bp)
- return (EINTR);
- if ((bp->b_flags & B_DONE) == 0) {
- bp->b_flags |= B_READ;
- vfs_busy_pages(bp, 0);
- error = nfs_doio(bp, cred, p);
- if (error == 0 && (bp->b_flags & B_INVAL))
- break;
- if (error) {
- brelse(bp);
- } else if (i < lbn) {
- brelse(bp);
- }
- }
- }
- }
- if (error)
- return (error);
- }
-
- /*
- * If not eof and read aheads are enabled, start one.
- * (You need the current block first, so that you have the
- * directory offset cookie of the next block.)
- */
- if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
- (bp->b_flags & B_INVAL) == 0 &&
- (np->n_direofoffset == 0 ||
- (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
- !(np->n_flag & NQNFSNONCACHE) &&
- !incore(vp, lbn + 1)) {
- rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
- if (rabp) {
- if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
- rabp->b_flags |= (B_READ | B_ASYNC);
- vfs_busy_pages(rabp, 0);
- if (nfs_asyncio(rabp, cred)) {
- rabp->b_flags |= B_INVAL|B_ERROR;
- vfs_unbusy_pages(rabp);
- brelse(rabp);
- }
- } else {
- brelse(rabp);
- }
- }
- }
- /*
- * Make sure we use a signed variant of min() since
- * the second term may be negative.
- */
- n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
- break;
- default:
- printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
- break;
- };
-
- if (n > 0) {
- error = uiomove(bp->b_data + on, (int)n, uio);
- }
- switch (vp->v_type) {
- case VREG:
- break;
- case VLNK:
- n = 0;
- break;
- case VDIR:
- if (np->n_flag & NQNFSNONCACHE)
- bp->b_flags |= B_INVAL;
- break;
- default:
- printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
- }
- brelse(bp);
- } while (error == 0 && uio->uio_resid > 0 && n > 0);
- return (error);
-}
-
-static void
-nfs_prot_buf(bp, off, n)
- struct buf *bp;
- int off;
- int n;
-{
- int pindex, boff, end;
-
- if ((bp->b_flags & B_VMIO) == 0)
- return;
-
- end = round_page(off + n);
- for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) {
- pindex = boff >> PAGE_SHIFT;
- vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE);
- }
-}
-
-/*
- * Vnode op for write using bio
- */
-int
-nfs_write(ap)
- struct vop_write_args /* {
- struct vnode *a_vp;
- struct uio *a_uio;
- int a_ioflag;
- struct ucred *a_cred;
- } */ *ap;
-{
- register int biosize;
- register struct uio *uio = ap->a_uio;
- struct proc *p = uio->uio_procp;
- register struct vnode *vp = ap->a_vp;
- struct nfsnode *np = VTONFS(vp);
- register struct ucred *cred = ap->a_cred;
- int ioflag = ap->a_ioflag;
- struct buf *bp;
- struct vattr vattr;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn;
- int bufsize;
- int n, on, error = 0, iomode, must_commit;
-
-#ifdef DIAGNOSTIC
- if (uio->uio_rw != UIO_WRITE)
- panic("nfs_write mode");
- if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
- panic("nfs_write proc");
-#endif
- if (vp->v_type != VREG)
- return (EIO);
- if (np->n_flag & NWRITEERR) {
- np->n_flag &= ~NWRITEERR;
- return (np->n_error);
- }
- if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
- (void)nfs_fsinfo(nmp, vp, cred, p);
- if (ioflag & (IO_APPEND | IO_SYNC)) {
- if (np->n_flag & NMODIFIED) {
- np->n_attrstamp = 0;
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- }
- if (ioflag & IO_APPEND) {
- np->n_attrstamp = 0;
- error = VOP_GETATTR(vp, &vattr, cred, p);
- if (error)
- return (error);
- uio->uio_offset = np->n_size;
- }
- }
- if (uio->uio_offset < 0)
- return (EINVAL);
- if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
- return (EFBIG);
- if (uio->uio_resid == 0)
- return (0);
- /*
- * Maybe this should be above the vnode op call, but so long as
- * file servers have no limits, i don't think it matters
- */
- if (p && uio->uio_offset + uio->uio_resid >
- p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
- psignal(p, SIGXFSZ);
- return (EFBIG);
- }
- /*
- * I use nm_rsize, not nm_wsize so that all buffer cache blocks
- * will be the same size within a filesystem. nfs_writerpc will
- * still use nm_wsize when sizing the rpc's.
- */
- biosize = vp->v_mount->mnt_stat.f_iosize;
- do {
- /*
- * Check for a valid write lease.
- */
- if ((nmp->nm_flag & NFSMNT_NQNFS) &&
- NQNFS_CKINVALID(vp, np, ND_WRITE)) {
- do {
- error = nqnfs_getlease(vp, ND_WRITE, cred, p);
- } while (error == NQNFS_EXPIRED);
- if (error)
- return (error);
- if (np->n_lrev != np->n_brev ||
- (np->n_flag & NQNFSNONCACHE)) {
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- np->n_brev = np->n_lrev;
- }
- }
- if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
- iomode = NFSV3WRITE_FILESYNC;
- error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
- if (must_commit)
- nfs_clearcommit(vp->v_mount);
- return (error);
- }
- nfsstats.biocache_writes++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset & (biosize-1);
- n = min((unsigned)(biosize - on), uio->uio_resid);
-again:
- if (uio->uio_offset + n > np->n_size) {
- np->n_size = uio->uio_offset + n;
- np->n_flag |= NMODIFIED;
- vnode_pager_setsize(vp, np->n_size);
- }
- bufsize = biosize;
- if ((off_t)(lbn + 1) * biosize > np->n_size) {
- bufsize = np->n_size - (off_t)lbn * biosize;
- bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
- }
- bp = nfs_getcacheblk(vp, lbn, bufsize, p);
- if (!bp)
- return (EINTR);
- if (bp->b_wcred == NOCRED) {
- crhold(cred);
- bp->b_wcred = cred;
- }
- np->n_flag |= NMODIFIED;
-
- if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
- bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
-
- /*
- * If the new write will leave a contiguous dirty
- * area, just update the b_dirtyoff and b_dirtyend,
- * otherwise force a write rpc of the old dirty area.
- */
- if (bp->b_dirtyend > 0 &&
- (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
- bp->b_proc = p;
- if (VOP_BWRITE(bp) == EINTR)
- return (EINTR);
- goto again;
- }
-
- /*
- * Check for valid write lease and get one as required.
- * In case getblk() and/or bwrite() delayed us.
- */
- if ((nmp->nm_flag & NFSMNT_NQNFS) &&
- NQNFS_CKINVALID(vp, np, ND_WRITE)) {
- do {
- error = nqnfs_getlease(vp, ND_WRITE, cred, p);
- } while (error == NQNFS_EXPIRED);
- if (error) {
- brelse(bp);
- return (error);
- }
- if (np->n_lrev != np->n_brev ||
- (np->n_flag & NQNFSNONCACHE)) {
- brelse(bp);
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- np->n_brev = np->n_lrev;
- goto again;
- }
- }
-
- error = uiomove((char *)bp->b_data + on, n, uio);
- bp->b_flags &= ~B_NEEDCOMMIT;
- if (error) {
- bp->b_flags |= B_ERROR;
- brelse(bp);
- return (error);
- }
-
- /*
- * This will keep the buffer and mmaped regions more coherent.
- */
- nfs_prot_buf(bp, on, n);
-
- if (bp->b_dirtyend > 0) {
- bp->b_dirtyoff = min(on, bp->b_dirtyoff);
- bp->b_dirtyend = max((on + n), bp->b_dirtyend);
- } else {
- bp->b_dirtyoff = on;
- bp->b_dirtyend = on + n;
- }
- if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
- bp->b_validoff > bp->b_dirtyend) {
- bp->b_validoff = bp->b_dirtyoff;
- bp->b_validend = bp->b_dirtyend;
- } else {
- bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
- bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
- }
-
- /*
- * Since this block is being modified, it must be written
- * again and not just committed.
- */
- bp->b_flags &= ~B_NEEDCOMMIT;
-
- /*
- * If the lease is non-cachable or IO_SYNC do bwrite().
- */
- if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
- bp->b_proc = p;
- if (ioflag & IO_INVAL)
- bp->b_flags |= B_INVAL;
- error = VOP_BWRITE(bp);
- if (error)
- return (error);
- if (np->n_flag & NQNFSNONCACHE) {
- error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
- if (error)
- return (error);
- }
- } else if ((n + on) == biosize &&
- (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
- bp->b_proc = (struct proc *)0;
- bp->b_flags |= B_ASYNC;
- (void)nfs_writebp(bp, 0);
- } else
- bdwrite(bp);
- } while (uio->uio_resid > 0 && n > 0);
- return (0);
-}
-
-/*
- * Get an nfs cache block.
- * Allocate a new one if the block isn't currently in the cache
- * and return the block marked busy. If the calling process is
- * interrupted by a signal for an interruptible mount point, return
- * NULL.
- */
-static struct buf *
-nfs_getcacheblk(vp, bn, size, p)
- struct vnode *vp;
- daddr_t bn;
- int size;
- struct proc *p;
-{
- register struct buf *bp;
- struct mount *mp;
- struct nfsmount *nmp;
-
- mp = vp->v_mount;
- nmp = VFSTONFS(mp);
-
- if (nmp->nm_flag & NFSMNT_INT) {
- bp = getblk(vp, bn, size, PCATCH, 0);
- while (bp == (struct buf *)0) {
- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
- return ((struct buf *)0);
- bp = getblk(vp, bn, size, 0, 2 * hz);
- }
- } else
- bp = getblk(vp, bn, size, 0, 0);
-
- if (vp->v_type == VREG) {
- int biosize;
- biosize = mp->mnt_stat.f_iosize;
- bp->b_blkno = bn * (biosize / DEV_BSIZE);
- }
-
- return (bp);
-}
-
-/*
- * Flush and invalidate all dirty buffers. If another process is already
- * doing the flush, just wait for completion.
- */
-int
-nfs_vinvalbuf(vp, flags, cred, p, intrflg)
- struct vnode *vp;
- int flags;
- struct ucred *cred;
- struct proc *p;
- int intrflg;
-{
- register struct nfsnode *np = VTONFS(vp);
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- int error = 0, slpflag, slptimeo;
-
- if (vp->v_flag & VXLOCK) {
- return (0);
- }
-
- if ((nmp->nm_flag & NFSMNT_INT) == 0)
- intrflg = 0;
- if (intrflg) {
- slpflag = PCATCH;
- slptimeo = 2 * hz;
- } else {
- slpflag = 0;
- slptimeo = 0;
- }
- /*
- * First wait for any other process doing a flush to complete.
- */
- while (np->n_flag & NFLUSHINPROG) {
- np->n_flag |= NFLUSHWANT;
- error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
- slptimeo);
- if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
- return (EINTR);
- }
-
- /*
- * Now, flush as required.
- */
- np->n_flag |= NFLUSHINPROG;
- error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
- while (error) {
- if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
- np->n_flag &= ~NFLUSHINPROG;
- if (np->n_flag & NFLUSHWANT) {
- np->n_flag &= ~NFLUSHWANT;
- wakeup((caddr_t)&np->n_flag);
- }
- return (EINTR);
- }
- error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
- }
- np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
- if (np->n_flag & NFLUSHWANT) {
- np->n_flag &= ~NFLUSHWANT;
- wakeup((caddr_t)&np->n_flag);
- }
- return (0);
-}
-
-/*
- * Initiate asynchronous I/O. Return an error if no nfsiods are available.
- * This is mainly to avoid queueing async I/O requests when the nfsiods
- * are all hung on a dead server.
- */
-int
-nfs_asyncio(bp, cred)
- register struct buf *bp;
- struct ucred *cred;
-{
- struct nfsmount *nmp;
- int i;
- int gotiod;
- int slpflag = 0;
- int slptimeo = 0;
- int error;
-
- if (nfs_numasync == 0)
- return (EIO);
-
- nmp = VFSTONFS(bp->b_vp->v_mount);
-again:
- if (nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- gotiod = FALSE;
-
- /*
- * Find a free iod to process this request.
- */
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_iodwant[i]) {
- /*
- * Found one, so wake it up and tell it which
- * mount to process.
- */
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: waking iod %d for mount %p\n",
- i, nmp));
- nfs_iodwant[i] = (struct proc *)0;
- nfs_iodmount[i] = nmp;
- nmp->nm_bufqiods++;
- wakeup((caddr_t)&nfs_iodwant[i]);
- gotiod = TRUE;
- break;
- }
-
- /*
- * If none are free, we may already have an iod working on this mount
- * point. If so, it will process our request.
- */
- if (!gotiod) {
- if (nmp->nm_bufqiods > 0) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: %d iods are already processing mount %p\n",
- nmp->nm_bufqiods, nmp));
- gotiod = TRUE;
- }
- }
-
- /*
- * If we have an iod which can process the request, then queue
- * the buffer.
- */
- if (gotiod) {
- /*
- * Ensure that the queue never grows too large.
- */
- while (nmp->nm_bufqlen >= 2*nfs_numasync) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
- nmp->nm_bufqwant = TRUE;
- error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
- "nfsaio", slptimeo);
- if (error) {
- if (nfs_sigintr(nmp, NULL, bp->b_proc))
- return (EINTR);
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- }
- /*
- * We might have lost our iod while sleeping,
- * so check and loop if nescessary.
- */
- if (nmp->nm_bufqiods == 0) {
- NFS_DPF(ASYNCIO,
- ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
- goto again;
- }
- }
-
- if (bp->b_flags & B_READ) {
- if (bp->b_rcred == NOCRED && cred != NOCRED) {
- crhold(cred);
- bp->b_rcred = cred;
- }
- } else {
- bp->b_flags |= B_WRITEINPROG;
- if (bp->b_wcred == NOCRED && cred != NOCRED) {
- crhold(cred);
- bp->b_wcred = cred;
- }
- }
-
- TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
- nmp->nm_bufqlen++;
- return (0);
- }
-
- /*
- * All the iods are busy on other mounts, so return EIO to
- * force the caller to process the i/o synchronously.
- */
- NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
- return (EIO);
-}
-
-/*
- * Do an I/O operation to/from a cache block. This may be called
- * synchronously or from an nfsiod.
- */
-int
-nfs_doio(bp, cr, p)
- register struct buf *bp;
- struct ucred *cr;
- struct proc *p;
-{
- register struct uio *uiop;
- register struct vnode *vp;
- struct nfsnode *np;
- struct nfsmount *nmp;
- int error = 0, diff, len, iomode, must_commit = 0;
- struct uio uio;
- struct iovec io;
-
- vp = bp->b_vp;
- np = VTONFS(vp);
- nmp = VFSTONFS(vp->v_mount);
- uiop = &uio;
- uiop->uio_iov = &io;
- uiop->uio_iovcnt = 1;
- uiop->uio_segflg = UIO_SYSSPACE;
- uiop->uio_procp = p;
-
- /*
- * Historically, paging was done with physio, but no more.
- */
- if (bp->b_flags & B_PHYS) {
- /*
- * ...though reading /dev/drum still gets us here.
- */
- io.iov_len = uiop->uio_resid = bp->b_bcount;
- /* mapping was done by vmapbuf() */
- io.iov_base = bp->b_data;
- uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
- if (bp->b_flags & B_READ) {
- uiop->uio_rw = UIO_READ;
- nfsstats.read_physios++;
- error = nfs_readrpc(vp, uiop, cr);
- } else {
- int com;
-
- iomode = NFSV3WRITE_DATASYNC;
- uiop->uio_rw = UIO_WRITE;
- nfsstats.write_physios++;
- error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
- }
- if (error) {
- bp->b_flags |= B_ERROR;
- bp->b_error = error;
- }
- } else if (bp->b_flags & B_READ) {
- io.iov_len = uiop->uio_resid = bp->b_bcount;
- io.iov_base = bp->b_data;
- uiop->uio_rw = UIO_READ;
- switch (vp->v_type) {
- case VREG:
- uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
- nfsstats.read_bios++;
- error = nfs_readrpc(vp, uiop, cr);
- if (!error) {
- bp->b_validoff = 0;
- if (uiop->uio_resid) {
- /*
- * If len > 0, there is a hole in the file and
- * no writes after the hole have been pushed to
- * the server yet.
- * Just zero fill the rest of the valid area.
- */
- diff = bp->b_bcount - uiop->uio_resid;
- len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
- + diff);
- if (len > 0) {
- len = min(len, uiop->uio_resid);
- bzero((char *)bp->b_data + diff, len);
- bp->b_validend = diff + len;
- } else
- bp->b_validend = diff;
- } else
- bp->b_validend = bp->b_bcount;
- }
- if (p && (vp->v_flag & VTEXT) &&
- (((nmp->nm_flag & NFSMNT_NQNFS) &&
- NQNFS_CKINVALID(vp, np, ND_READ) &&
- np->n_lrev != np->n_brev) ||
- (!(nmp->nm_flag & NFSMNT_NQNFS) &&
- np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
- uprintf("Process killed due to text file modification\n");
- psignal(p, SIGKILL);
- p->p_flag |= P_NOSWAP;
- }
- break;
- case VLNK:
- uiop->uio_offset = (off_t)0;
- nfsstats.readlink_bios++;
- error = nfs_readlinkrpc(vp, uiop, cr);
- break;
- case VDIR:
- nfsstats.readdir_bios++;
- uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
- if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
- error = nfs_readdirplusrpc(vp, uiop, cr);
- if (error == NFSERR_NOTSUPP)
- nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
- }
- if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
- error = nfs_readdirrpc(vp, uiop, cr);
- if (error == 0 && uiop->uio_resid == bp->b_bcount)
- bp->b_flags |= B_INVAL;
- break;
- default:
- printf("nfs_doio: type %x unexpected\n",vp->v_type);
- break;
- };
- if (error) {
- bp->b_flags |= B_ERROR;
- bp->b_error = error;
- }
- } else {
- if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
- bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
-
- if (bp->b_dirtyend > bp->b_dirtyoff) {
- io.iov_len = uiop->uio_resid = bp->b_dirtyend
- - bp->b_dirtyoff;
- uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
- + bp->b_dirtyoff;
- io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
- uiop->uio_rw = UIO_WRITE;
- nfsstats.write_bios++;
- if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
- iomode = NFSV3WRITE_UNSTABLE;
- else
- iomode = NFSV3WRITE_FILESYNC;
- bp->b_flags |= B_WRITEINPROG;
- error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
- if (!error && iomode == NFSV3WRITE_UNSTABLE) {
- bp->b_flags |= B_NEEDCOMMIT;
- if (bp->b_dirtyoff == 0
- && bp->b_dirtyend == bp->b_bufsize)
- bp->b_flags |= B_CLUSTEROK;
- } else
- bp->b_flags &= ~B_NEEDCOMMIT;
- bp->b_flags &= ~B_WRITEINPROG;
-
- /*
- * For an interrupted write, the buffer is still valid
- * and the write hasn't been pushed to the server yet,
- * so we can't set B_ERROR and report the interruption
- * by setting B_EINTR. For the B_ASYNC case, B_EINTR
- * is not relevant, so the rpc attempt is essentially
- * a noop. For the case of a V3 write rpc not being
- * committed to stable storage, the block is still
- * dirty and requires either a commit rpc or another
- * write rpc with iomode == NFSV3WRITE_FILESYNC before
- * the block is reused. This is indicated by setting
- * the B_DELWRI and B_NEEDCOMMIT flags.
- */
- if (error == EINTR
- || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
- int s;
-
- bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- ++numdirtybuffers;
- bp->b_flags |= B_DELWRI;
- s = splbio();
- reassignbuf(bp, vp);
- splx(s);
- if ((bp->b_flags & B_ASYNC) == 0)
- bp->b_flags |= B_EINTR;
- } else {
- if (error) {
- bp->b_flags |= B_ERROR;
- bp->b_error = np->n_error = error;
- np->n_flag |= NWRITEERR;
- }
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- }
- } else {
- bp->b_resid = 0;
- biodone(bp);
- return (0);
- }
- }
- bp->b_resid = uiop->uio_resid;
- if (must_commit)
- nfs_clearcommit(vp->v_mount);
- biodone(bp);
- return (error);
-}