aboutsummaryrefslogtreecommitdiff
path: root/sys/ufs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c7
-rw-r--r--sys/ufs/ffs/ffs_balloc.c190
-rw-r--r--sys/ufs/ffs/ffs_extern.h8
-rw-r--r--sys/ufs/ffs/ffs_inode.c102
-rw-r--r--sys/ufs/ffs/ffs_softdep.c521
-rw-r--r--sys/ufs/ffs/ffs_softdep_stub.c14
-rw-r--r--sys/ufs/ffs/softdep.h13
-rw-r--r--sys/ufs/ufs/ufs_bmap.c19
-rw-r--r--sys/ufs/ufs/ufs_extern.h18
-rw-r--r--sys/ufs/ufs/ufs_inode.c3
-rw-r--r--sys/ufs/ufs/ufs_lookup.c5
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c341
-rw-r--r--sys/ufs/ufs/ufs_vnops.c13
13 files changed, 1043 insertions, 211 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 710d6d14424b..1360ec8b18fe 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -188,9 +188,10 @@ nospace:
* invoked to get an appropriate block.
*/
int
-ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
+ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, cred, bpp)
struct inode *ip;
ufs2_daddr_t lbprev;
+ ufs2_daddr_t bprev;
ufs2_daddr_t bpref;
int osize, nsize;
struct ucred *cred;
@@ -200,7 +201,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
struct fs *fs;
struct buf *bp;
int cg, request, error, reclaimed;
- ufs2_daddr_t bprev, bno;
+ ufs2_daddr_t bno;
*bpp = 0;
vp = ITOV(ip);
@@ -224,7 +225,7 @@ retry:
if (suser_cred(cred, PRISON_ROOT) &&
freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0)
goto nospace;
- if ((bprev = DIP(ip, i_db[lbprev])) == 0) {
+ if (bprev == 0) {
printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev,
fs->fs_fsmnt);
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 9b1c3839b4cb..d9e8a08dbf5d 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -73,6 +73,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
struct ucred *cred, int flags, struct buf **bpp)
{
struct inode *ip;
+ struct ufs1_dinode *dp;
ufs_lbn_t lbn, lastlbn;
struct fs *fs;
ufs1_daddr_t nb;
@@ -86,12 +87,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
struct thread *td = curthread; /* XXX */
ip = VTOI(vp);
+ dp = ip->i_din1;
fs = ip->i_fs;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
if (size > fs->fs_bsize)
panic("ffs_balloc_ufs1: blk too big");
*bpp = NULL;
+ if (flags & IO_EXT)
+ return (EOPNOTSUPP);
if (lbn < 0)
return (EFBIG);
@@ -105,22 +109,20 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
nb = lastlbn;
osize = blksize(fs, ip, nb);
if (osize < fs->fs_bsize && osize > 0) {
- error = ffs_realloccg(ip, nb,
- ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
- &ip->i_din1->di_db[0]),
- osize, (int)fs->fs_bsize, cred, &bp);
+ error = ffs_realloccg(ip, nb, dp->di_db[nb],
+ ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
+ &dp->di_db[0]), osize, (int)fs->fs_bsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
softdep_setup_allocdirect(ip, nb,
- dbtofsb(fs, bp->b_blkno),
- ip->i_din1->di_db[nb],
+ dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
fs->fs_bsize, osize, bp);
ip->i_size = smalllblktosize(fs, nb + 1);
- ip->i_din1->di_size = ip->i_size;
- ip->i_din1->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+ dp->di_size = ip->i_size;
+ dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- if (flags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -132,7 +134,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (lbn < NDADDR) {
if (flags & BA_METAONLY)
panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
- nb = ip->i_din1->di_db[lbn];
+ nb = dp->di_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
@@ -157,10 +159,9 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
}
bp->b_blkno = fsbtodb(fs, nb);
} else {
- error = ffs_realloccg(ip, lbn,
+ error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
ffs_blkpref_ufs1(ip, lbn, (int)lbn,
- &ip->i_din1->di_db[0]),
- osize, nsize, cred, &bp);
+ &dp->di_db[0]), osize, nsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@@ -174,8 +175,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
else
nsize = fs->fs_bsize;
error = ffs_alloc(ip, lbn,
- ffs_blkpref_ufs1(ip, lbn, (int)lbn,
- &ip->i_din1->di_db[0]),
+ ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
nsize, cred, &newb);
if (error)
return (error);
@@ -187,7 +187,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
softdep_setup_allocdirect(ip, lbn, newb, 0,
nsize, 0, bp);
}
- ip->i_din1->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+ dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
*bpp = bp;
return (0);
@@ -206,7 +206,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* Fetch the first indirect block allocating if necessary.
*/
--num;
- nb = ip->i_din1->di_ib[indirs[0].in_off];
+ nb = dp->di_ib[indirs[0].in_off];
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
@@ -233,7 +233,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
else if ((error = bwrite(bp)) != 0)
goto fail;
}
- allocib = &ip->i_din1->di_ib[indirs[0].in_off];
+ allocib = &dp->di_ib[indirs[0].in_off];
*allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
@@ -289,7 +289,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -329,7 +329,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -382,7 +382,7 @@ fail:
} else {
bap = (ufs1_daddr_t *)bp->b_data;
bap[indirs[unwindidx].in_off] = 0;
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -398,7 +398,7 @@ fail:
*/
(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
- ip->i_din1->di_blocks -= btodb(deallocated);
+ dp->di_blocks -= btodb(deallocated);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
@@ -417,6 +417,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
struct ucred *cred, int flags, struct buf **bpp)
{
struct inode *ip;
+ struct ufs2_dinode *dp;
ufs_lbn_t lbn, lastlbn;
struct fs *fs;
struct buf *bp, *nbp;
@@ -428,6 +429,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
struct thread *td = curthread; /* XXX */
ip = VTOI(vp);
+ dp = ip->i_din2;
fs = ip->i_fs;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
@@ -438,6 +440,112 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
return (EFBIG);
/*
+ * Check for allocating external data.
+ */
+ if (flags & IO_EXT) {
+ if (lbn >= NXADDR)
+ return (EFBIG);
+ /*
+ * If the next write will extend the data into a new block,
+ * and the data is currently composed of a fragment
+ * this fragment has to be extended to be a full block.
+ */
+ lastlbn = lblkno(fs, dp->di_extsize);
+ if (lastlbn < lbn) {
+ nb = lastlbn;
+ osize = sblksize(fs, dp->di_extsize, nb);
+ if (osize < fs->fs_bsize && osize > 0) {
+ error = ffs_realloccg(ip, -1 - nb,
+ dp->di_extb[nb],
+ ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
+ &dp->di_extb[0]), osize,
+ (int)fs->fs_bsize, cred, &bp);
+ if (error)
+ return (error);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, nb,
+ dbtofsb(fs, bp->b_blkno),
+ dp->di_extb[nb],
+ fs->fs_bsize, osize, bp);
+ dp->di_extsize = smalllblktosize(fs, nb + 1);
+ dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
+ bp->b_xflags |= BX_ALTDATA;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (flags & IO_SYNC)
+ bwrite(bp);
+ else
+ bawrite(bp);
+ }
+ }
+ /*
+ * All blocks are direct blocks
+ */
+ if (flags & BA_METAONLY)
+ panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
+ nb = dp->di_extb[lbn];
+ if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
+ error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bp->b_blkno = fsbtodb(fs, nb);
+ bp->b_xflags |= BX_ALTDATA;
+ *bpp = bp;
+ return (0);
+ }
+ if (nb != 0) {
+ /*
+ * Consider need to reallocate a fragment.
+ */
+ osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
+ nsize = fragroundup(fs, size);
+ if (nsize <= osize) {
+ error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bp->b_blkno = fsbtodb(fs, nb);
+ bp->b_xflags |= BX_ALTDATA;
+ } else {
+ error = ffs_realloccg(ip, -1 - lbn,
+ dp->di_extb[lbn],
+ ffs_blkpref_ufs2(ip, lbn, (int)lbn,
+ &dp->di_extb[0]), osize, nsize, cred, &bp);
+ if (error)
+ return (error);
+ bp->b_xflags |= BX_ALTDATA;
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, lbn,
+ dbtofsb(fs, bp->b_blkno), nb,
+ nsize, osize, bp);
+ }
+ } else {
+ if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
+ nsize = fragroundup(fs, size);
+ else
+ nsize = fs->fs_bsize;
+ error = ffs_alloc(ip, lbn,
+ ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
+ nsize, cred, &newb);
+ if (error)
+ return (error);
+ bp = getblk(vp, -1 - lbn, nsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ bp->b_xflags |= BX_ALTDATA;
+ if (flags & BA_CLRBUF)
+ vfs_bio_clrbuf(bp);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocext(ip, lbn, newb, 0,
+ nsize, 0, bp);
+ }
+ dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ *bpp = bp;
+ return (0);
+ }
+ /*
* If the next write will extend the file into a new block,
* and the file is currently composed of a fragment
* this fragment has to be extended to be a full block.
@@ -447,22 +555,22 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
nb = lastlbn;
osize = blksize(fs, ip, nb);
if (osize < fs->fs_bsize && osize > 0) {
- error = ffs_realloccg(ip, nb,
+ error = ffs_realloccg(ip, nb, dp->di_db[nb],
ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
- &ip->i_din2->di_db[0]),
- osize, (int)fs->fs_bsize, cred, &bp);
+ &dp->di_db[0]), osize, (int)fs->fs_bsize,
+ cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
softdep_setup_allocdirect(ip, nb,
dbtofsb(fs, bp->b_blkno),
- ip->i_din2->di_db[nb],
+ dp->di_db[nb],
fs->fs_bsize, osize, bp);
ip->i_size = smalllblktosize(fs, nb + 1);
- ip->i_din2->di_size = ip->i_size;
- ip->i_din2->di_db[nb] = dbtofsb(fs, bp->b_blkno);
+ dp->di_size = ip->i_size;
+ dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- if (flags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -474,7 +582,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (lbn < NDADDR) {
if (flags & BA_METAONLY)
panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
- nb = ip->i_din2->di_db[lbn];
+ nb = dp->di_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
@@ -499,10 +607,9 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
}
bp->b_blkno = fsbtodb(fs, nb);
} else {
- error = ffs_realloccg(ip, lbn,
+ error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
- &ip->i_din2->di_db[0]),
- osize, nsize, cred, &bp);
+ &dp->di_db[0]), osize, nsize, cred, &bp);
if (error)
return (error);
if (DOINGSOFTDEP(vp))
@@ -517,8 +624,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
nsize = fs->fs_bsize;
error = ffs_alloc(ip, lbn,
ffs_blkpref_ufs2(ip, lbn, (int)lbn,
- &ip->i_din2->di_db[0]),
- nsize, cred, &newb);
+ &dp->di_db[0]), nsize, cred, &newb);
if (error)
return (error);
bp = getblk(vp, lbn, nsize, 0, 0);
@@ -529,7 +635,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
softdep_setup_allocdirect(ip, lbn, newb, 0,
nsize, 0, bp);
}
- ip->i_din2->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
+ dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
*bpp = bp;
return (0);
@@ -548,7 +654,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* Fetch the first indirect block allocating if necessary.
*/
--num;
- nb = ip->i_din2->di_ib[indirs[0].in_off];
+ nb = dp->di_ib[indirs[0].in_off];
allocib = NULL;
allocblk = allociblk;
if (nb == 0) {
@@ -575,7 +681,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
else if ((error = bwrite(bp)) != 0)
goto fail;
}
- allocib = &ip->i_din2->di_ib[indirs[0].in_off];
+ allocib = &dp->di_ib[indirs[0].in_off];
*allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
@@ -631,7 +737,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -671,7 +777,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
* If required, write synchronously, otherwise use
* delayed write.
*/
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -724,7 +830,7 @@ fail:
} else {
bap = (ufs2_daddr_t *)bp->b_data;
bap[indirs[unwindidx].in_off] = 0;
- if (flags & BA_SYNC) {
+ if (flags & IO_SYNC) {
bwrite(bp);
} else {
if (bp->b_bufsize == fs->fs_bsize)
@@ -740,7 +846,7 @@ fail:
*/
(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
#endif
- ip->i_din2->di_blocks -= btodb(deallocated);
+ dp->di_blocks -= btodb(deallocated);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
(void) VOP_FSYNC(vp, cred, MNT_WAIT, td);
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index ae59ca380ffb..c2972c8f2305 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -79,8 +79,8 @@ int ffs_mountroot(void);
int ffs_mount(struct mount *, char *, caddr_t, struct nameidata *,
struct thread *);
int ffs_reallocblks(struct vop_reallocblks_args *);
-int ffs_realloccg(struct inode *,
- ufs2_daddr_t, ufs2_daddr_t, int, int, struct ucred *, struct buf **);
+int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
+ ufs2_daddr_t, int, int, struct ucred *, struct buf **);
void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t);
int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t);
void ffs_snapremove(struct vnode *vp);
@@ -115,11 +115,13 @@ void softdep_update_inodeblock(struct inode *, struct buf *, int);
void softdep_load_inodeblock(struct inode *);
void softdep_freefile(struct vnode *, ino_t, int);
int softdep_request_cleanup(struct fs *, struct vnode *);
-void softdep_setup_freeblocks(struct inode *, off_t);
+void softdep_setup_freeblocks(struct inode *, off_t, int);
void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
void softdep_setup_blkmapdep(struct buf *, struct fs *, ufs2_daddr_t);
void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,
ufs2_daddr_t, long, long, struct buf *);
+void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t,
+ ufs2_daddr_t, long, long, struct buf *);
void softdep_setup_allocindir_meta(struct buf *, struct inode *,
struct buf *, int, ufs2_daddr_t);
void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t,
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 08e5fddc3eff..83fa66ef22fd 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -146,22 +146,81 @@ ffs_truncate(vp, length, flags, cred, td)
struct inode *oip;
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
- ufs2_daddr_t count, blocksreleased = 0;
+ ufs2_daddr_t count, blocksreleased = 0, datablocks;
struct fs *fs;
struct buf *bp;
+ int needextclean, softdepslowdown, extblocks;
int offset, size, level, nblocks;
- int i, aflags, error, allerror;
+ int i, error, allerror;
off_t osize;
oip = VTOI(ovp);
fs = oip->i_fs;
if (length < 0)
return (EINVAL);
+ /*
+ * Historically clients did not have to specify which data
+ * they were truncating. So, if not specified, we assume
+ * traditional behavior, e.g., just the normal data.
+ */
+ if ((flags & (IO_EXT | IO_NORMAL)) == 0)
+ flags |= IO_NORMAL;
+ /*
+ * If we are truncating the extended-attributes, and cannot
+ * do it with soft updates, then do it slowly here. If we are
+ * truncating both the extended attributes and the file contents
+ * (e.g., the file is being unlinked), then pick it off with
+ * soft updates below.
+ */
+ needextclean = 0;
+ softdepslowdown = softdep_slowdown(ovp);
+ extblocks = 0;
+ datablocks = DIP(oip, i_blocks);
+ if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_din2->di_extsize > 0) {
+ extblocks = btodb(fragroundup(fs, oip->i_din2->di_extsize));
+ datablocks -= extblocks;
+ }
+ if ((flags & IO_EXT) && extblocks > 0) {
+ if (DOINGSOFTDEP(ovp) && softdepslowdown == 0 && length == 0) {
+ if ((flags & IO_NORMAL) == 0) {
+ softdep_setup_freeblocks(oip, length, IO_EXT);
+ return (0);
+ }
+ needextclean = 1;
+ } else {
+ if (length != 0)
+ panic("ffs_truncate: partial trunc of extdata");
+ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0)
+ return (error);
+ osize = oip->i_din2->di_extsize;
+ oip->i_din2->di_blocks -= extblocks;
+#ifdef QUOTA
+ (void) chkdq(oip, -extblocks, NOCRED, 0);
+#endif
+ vinvalbuf(ovp, V_ALT, cred, td, 0, 0);
+ oip->i_din2->di_extsize = 0;
+ for (i = 0; i < NXADDR; i++) {
+ oldblks[i] = oip->i_din2->di_extb[i];
+ oip->i_din2->di_extb[i] = 0;
+ }
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if ((error = ffs_update(ovp, 1)))
+ return (error);
+ for (i = 0; i < NXADDR; i++) {
+ if (oldblks[i] == 0)
+ continue;
+ ffs_blkfree(fs, oip->i_devvp, oldblks[i],
+ sblksize(fs, osize, i), oip->i_number);
+ }
+ }
+ }
+ if ((flags & IO_NORMAL) == 0)
+ return (0);
if (length > fs->fs_maxfilesize)
return (EFBIG);
if (ovp->v_type == VLNK &&
(oip->i_size < ovp->v_mount->mnt_maxsymlinklen ||
- DIP(oip, i_blocks) == 0)) {
+ datablocks == 0)) {
#ifdef DIAGNOSTIC
if (length != 0)
panic("ffs_truncate: partial truncate of symlink");
@@ -170,10 +229,14 @@ ffs_truncate(vp, length, flags, cred, td)
oip->i_size = 0;
DIP(oip, i_size) = 0;
oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (needextclean)
+ softdep_setup_freeblocks(oip, length, IO_EXT);
return (UFS_UPDATE(ovp, 1));
}
if (oip->i_size == length) {
oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if (needextclean)
+ softdep_setup_freeblocks(oip, length, IO_EXT);
return (UFS_UPDATE(ovp, 0));
}
if (fs->fs_ronly)
@@ -187,7 +250,7 @@ ffs_truncate(vp, length, flags, cred, td)
ffs_snapremove(ovp);
ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
if (DOINGSOFTDEP(ovp)) {
- if (length > 0 || softdep_slowdown(ovp)) {
+ if (length > 0 || softdepslowdown) {
/*
* If a file is only partially truncated, then
* we have to clean up the data structures
@@ -197,17 +260,18 @@ ffs_truncate(vp, length, flags, cred, td)
* rarely, we solve the problem by syncing the file
* so that it will have no data structures left.
*/
- if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
- td)) != 0)
+ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, td)) != 0)
return (error);
if (oip->i_flag & IN_SPACECOUNTED)
- fs->fs_pendingblocks -= DIP(oip, i_blocks);
+ fs->fs_pendingblocks -= datablocks;
} else {
#ifdef QUOTA
- (void) chkdq(oip, -DIP(oip, i_blocks), NOCRED, 0);
+ (void) chkdq(oip, -datablocks, NOCRED, 0);
#endif
- softdep_setup_freeblocks(oip, length);
- vinvalbuf(ovp, 0, cred, td, 0, 0);
+ softdep_setup_freeblocks(oip, length, needextclean ?
+ IO_EXT | IO_NORMAL : IO_NORMAL);
+ vinvalbuf(ovp, needextclean ? 0 : V_NORMAL,
+ cred, td, 0, 0);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ffs_update(ovp, 0));
}
@@ -220,18 +284,15 @@ ffs_truncate(vp, length, flags, cred, td)
*/
if (osize < length) {
vnode_pager_setsize(ovp, length);
- aflags = BA_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= BA_SYNC;
- error = UFS_BALLOC(ovp, length - 1, 1,
- cred, aflags, &bp);
+ flags |= BA_CLRBUF;
+ error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp);
if (error)
return (error);
oip->i_size = length;
DIP(oip, i_size) = length;
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (aflags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -252,10 +313,8 @@ ffs_truncate(vp, length, flags, cred, td)
DIP(oip, i_size) = length;
} else {
lbn = lblkno(fs, length);
- aflags = BA_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= BA_SYNC;
- error = UFS_BALLOC(ovp, length - 1, 1, cred, aflags, &bp);
+ flags |= BA_CLRBUF;
+ error = UFS_BALLOC(ovp, length - 1, 1, cred, flags, &bp);
if (error) {
return (error);
}
@@ -281,7 +340,7 @@ ffs_truncate(vp, length, flags, cred, td)
allocbuf(bp, size);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (aflags & BA_SYNC)
+ if (flags & IO_SYNC)
bwrite(bp);
else
bawrite(bp);
@@ -420,6 +479,7 @@ done:
if (newblks[i] != DIP(oip, i_db[i]))
panic("ffs_truncate2");
if (length == 0 &&
+ (fs->fs_magic != FS_UFS2_MAGIC || oip->i_din2->di_extsize == 0) &&
(!TAILQ_EMPTY(&ovp->v_dirtyblkhd) ||
!TAILQ_EMPTY(&ovp->v_cleanblkhd)))
panic("ffs_truncate3");
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index f03615099af9..631c82a34855 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -157,6 +157,7 @@ static void clear_inodedeps(struct thread *);
static int flush_pagedep_deps(struct vnode *, struct mount *,
struct diraddhd *);
static int flush_inodedep_deps(struct fs *, ino_t);
+static int flush_deplist(struct allocdirectlst *, int, int *);
static int handle_written_filepage(struct pagedep *, struct buf *);
static void diradd_inode_written(struct diradd *, struct inodedep *);
static int handle_written_inodeblock(struct inodedep *, struct buf *);
@@ -181,7 +182,7 @@ static void free_allocdirect(struct allocdirectlst *,
static int check_inode_unwritten(struct inodedep *);
static int free_inodedep(struct inodedep *);
static void handle_workitem_freeblocks(struct freeblks *, int);
-static void merge_inode_lists(struct inodedep *);
+static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
static void setup_allocindir_phase2(struct buf *, struct inode *,
struct allocindir *);
static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
@@ -1041,12 +1042,15 @@ top:
inodedep->id_nlinkdelta = 0;
inodedep->id_savedino1 = NULL;
inodedep->id_savedsize = -1;
+ inodedep->id_savedextsize = -1;
inodedep->id_buf = NULL;
LIST_INIT(&inodedep->id_pendinghd);
LIST_INIT(&inodedep->id_inowait);
LIST_INIT(&inodedep->id_bufwait);
TAILQ_INIT(&inodedep->id_inoupdt);
TAILQ_INIT(&inodedep->id_newinoupdt);
+ TAILQ_INIT(&inodedep->id_extupdt);
+ TAILQ_INIT(&inodedep->id_newextupdt);
ACQUIRE_LOCK(&lk);
LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
sema_release(&inodedep_in_progress);
@@ -1566,6 +1570,103 @@ handle_workitem_freefrag(freefrag)
}
/*
+ * Set up a dependency structure for an external attributes data block.
+ * This routine follows much of the structure of softdep_setup_allocdirect.
+ * See the description of softdep_setup_allocdirect above for details.
+ */
+void
+softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t newblkno;
+ ufs2_daddr_t oldblkno;
+ long newsize;
+ long oldsize;
+ struct buf *bp;
+{
+ struct allocdirect *adp, *oldadp;
+ struct allocdirectlst *adphead;
+ struct bmsafemap *bmsafemap;
+ struct inodedep *inodedep;
+ struct newblk *newblk;
+
+ MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
+ M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);
+ adp->ad_list.wk_type = D_ALLOCDIRECT;
+ adp->ad_lbn = lbn;
+ adp->ad_newblkno = newblkno;
+ adp->ad_oldblkno = oldblkno;
+ adp->ad_newsize = newsize;
+ adp->ad_oldsize = oldsize;
+ adp->ad_state = ATTACHED | EXTDATA;
+ LIST_INIT(&adp->ad_newdirblk);
+ if (newblkno == oldblkno)
+ adp->ad_freefrag = NULL;
+ else
+ adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
+
+ if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
+ panic("softdep_setup_allocext: lost block");
+
+ ACQUIRE_LOCK(&lk);
+ inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
+ adp->ad_inodedep = inodedep;
+
+ if (newblk->nb_state == DEPCOMPLETE) {
+ adp->ad_state |= DEPCOMPLETE;
+ adp->ad_buf = NULL;
+ } else {
+ bmsafemap = newblk->nb_bmsafemap;
+ adp->ad_buf = bmsafemap->sm_buf;
+ LIST_REMOVE(newblk, nb_deps);
+ LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
+ }
+ LIST_REMOVE(newblk, nb_hash);
+ FREE(newblk, M_NEWBLK);
+
+ WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
+ if (lbn >= NXADDR) {
+ FREE_LOCK(&lk);
+ panic("softdep_setup_allocext: lbn %d > NXADDR", lbn);
+ }
+ /*
+ * The list of allocdirects must be kept in sorted and ascending
+ * order so that the rollback routines can quickly determine the
+ * first uncommitted block (the size of the file stored on disk
+ * ends at the end of the lowest committed fragment, or if there
+ * are no fragments, at the end of the highest committed block).
+ * Since files generally grow, the typical case is that the new
+ * block is to be added at the end of the list. We speed this
+ * special case by checking against the last allocdirect in the
+ * list before laboriously traversing the list looking for the
+ * insertion point.
+ */
+ adphead = &inodedep->id_newextupdt;
+ oldadp = TAILQ_LAST(adphead, allocdirectlst);
+ if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
+ /* insert at end of list */
+ TAILQ_INSERT_TAIL(adphead, adp, ad_next);
+ if (oldadp != NULL && oldadp->ad_lbn == lbn)
+ allocdirect_merge(adphead, adp, oldadp);
+ FREE_LOCK(&lk);
+ return;
+ }
+ TAILQ_FOREACH(oldadp, adphead, ad_next) {
+ if (oldadp->ad_lbn >= lbn)
+ break;
+ }
+ if (oldadp == NULL) {
+ FREE_LOCK(&lk);
+ panic("softdep_setup_allocext: lost entry");
+ }
+ /* insert in middle of list */
+ TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
+ if (oldadp->ad_lbn == lbn)
+ allocdirect_merge(adphead, adp, oldadp);
+ FREE_LOCK(&lk);
+}
+
+/*
* Indirect block allocation dependencies.
*
* The same dependencies that exist for a direct block also exist when
@@ -1769,7 +1870,8 @@ setup_allocindir_phase2(bp, ip, aip)
LIST_INIT(&newindirdep->ir_deplisthd);
LIST_INIT(&newindirdep->ir_donehd);
if (bp->b_blkno == bp->b_lblkno) {
- ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, NULL, NULL);
+ ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
+ NULL, NULL);
bp->b_blkno = blkno;
}
newindirdep->ir_savebp =
@@ -1809,9 +1911,10 @@ setup_allocindir_phase2(bp, ip, aip)
* can release it.
*/
void
-softdep_setup_freeblocks(ip, length)
+softdep_setup_freeblocks(ip, length, flags)
struct inode *ip; /* The inode whose length is to be reduced */
off_t length; /* The new length for the file */
+ int flags; /* IO_EXT and/or IO_NORMAL */
{
struct freeblks *freeblks;
struct inodedep *inodedep;
@@ -1819,6 +1922,7 @@ softdep_setup_freeblocks(ip, length)
struct vnode *vp;
struct buf *bp;
struct fs *fs;
+ ufs2_daddr_t extblocks, datablocks;
int i, delay, error;
fs = ip->i_fs;
@@ -1831,27 +1935,46 @@ softdep_setup_freeblocks(ip, length)
freeblks->fb_previousinum = ip->i_number;
freeblks->fb_devvp = ip->i_devvp;
freeblks->fb_mnt = ITOV(ip)->v_mount;
- freeblks->fb_oldsize = ip->i_size;
- freeblks->fb_newsize = length;
- freeblks->fb_chkcnt = DIP(ip, i_blocks);
- for (i = 0; i < NDADDR; i++) {
- freeblks->fb_dblks[i] = DIP(ip, i_db[i]);
- DIP(ip, i_db[i]) = 0;
- }
- for (i = 0; i < NIADDR; i++) {
- freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);
- DIP(ip, i_ib[i]) = 0;
- }
- DIP(ip, i_blocks) = 0;
- ip->i_size = 0;
- DIP(ip, i_size) = 0;
- /*
- * If the file was removed, then the space being freed was
- * accounted for then (see softdep_filereleased()). If the
- * file is merely being truncated, then we account for it now.
- */
- if ((ip->i_flag & IN_SPACECOUNTED) == 0)
- fs->fs_pendingblocks += freeblks->fb_chkcnt;
+ extblocks = 0;
+ if (fs->fs_magic == FS_UFS2_MAGIC)
+ extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+ datablocks = DIP(ip, i_blocks) - extblocks;
+ if ((flags & IO_NORMAL) == 0) {
+ freeblks->fb_oldsize = 0;
+ freeblks->fb_chkcnt = 0;
+ } else {
+ freeblks->fb_oldsize = ip->i_size;
+ ip->i_size = 0;
+ DIP(ip, i_size) = 0;
+ freeblks->fb_chkcnt = datablocks;
+ for (i = 0; i < NDADDR; i++) {
+ freeblks->fb_dblks[i] = DIP(ip, i_db[i]);
+ DIP(ip, i_db[i]) = 0;
+ }
+ for (i = 0; i < NIADDR; i++) {
+ freeblks->fb_iblks[i] = DIP(ip, i_ib[i]);
+ DIP(ip, i_ib[i]) = 0;
+ }
+ /*
+ * If the file was removed, then the space being freed was
+ * accounted for then (see softdep_filereleased()). If the
+ * file is merely being truncated, then we account for it now.
+ */
+ if ((ip->i_flag & IN_SPACECOUNTED) == 0)
+ fs->fs_pendingblocks += datablocks;
+ }
+ if ((flags & IO_EXT) == 0) {
+ freeblks->fb_oldextsize = 0;
+ } else {
+ freeblks->fb_oldextsize = ip->i_din2->di_extsize;
+ ip->i_din2->di_extsize = 0;
+ freeblks->fb_chkcnt += extblocks;
+ for (i = 0; i < NXADDR; i++) {
+ freeblks->fb_eblks[i] = ip->i_din2->di_extb[i];
+ ip->i_din2->di_extb[i] = 0;
+ }
+ }
+ DIP(ip, i_blocks) -= freeblks->fb_chkcnt;
/*
* Push the zero'ed inode to to its disk buffer so that we are free
* to delete its dependencies below. Once the dependencies are gone
@@ -1897,9 +2020,18 @@ softdep_setup_freeblocks(ip, length)
* If we still have a bitmap dependency, then the inode has never
* been written to disk, so we can free any fragments without delay.
*/
- merge_inode_lists(inodedep);
- while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
- free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ if (flags & IO_NORMAL) {
+ merge_inode_lists(&inodedep->id_newinoupdt,
+ &inodedep->id_inoupdt);
+ while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
+ free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ }
+ if (flags & IO_EXT) {
+ merge_inode_lists(&inodedep->id_newextupdt,
+ &inodedep->id_extupdt);
+ while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != 0)
+ free_allocdirect(&inodedep->id_extupdt, adp, delay);
+ }
FREE_LOCK(&lk);
bdwrite(bp);
/*
@@ -1911,14 +2043,21 @@ softdep_setup_freeblocks(ip, length)
vp = ITOV(ip);
ACQUIRE_LOCK(&lk);
drain_output(vp, 1);
- while (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
- bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
+restart:
+ TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
+ if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
+ ((flags & IO_NORMAL) == 0 &&
+ (bp->b_xflags & BX_ALTDATA) == 0))
+ continue;
+ if (getdirtybuf(&bp, MNT_WAIT) == 0)
+ goto restart;
(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
deallocate_dependencies(bp, inodedep);
bp->b_flags |= B_INVAL | B_NOCACHE;
FREE_LOCK(&lk);
brelse(bp);
ACQUIRE_LOCK(&lk);
+ goto restart;
}
if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
@@ -2216,6 +2355,8 @@ check_inode_unwritten(inodedep)
LIST_FIRST(&inodedep->id_inowait) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
inodedep->id_nlinkdelta != 0)
return (0);
inodedep->id_state |= ALLCOMPLETE;
@@ -2249,6 +2390,8 @@ free_inodedep(inodedep)
LIST_FIRST(&inodedep->id_inowait) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
inodedep->id_nlinkdelta != 0 || inodedep->id_savedino1 != NULL)
return (0);
LIST_REMOVE(inodedep, id_hash);
@@ -2288,30 +2431,48 @@ handle_workitem_freeblocks(freeblks, flags)
nblocks = btodb(fs->fs_bsize);
blocksreleased = 0;
/*
- * Indirect blocks first.
+ * Release all extended attribute blocks or frags.
*/
- for (level = (NIADDR - 1); level >= 0; level--) {
- if ((bn = freeblks->fb_iblks[level]) == 0)
- continue;
- if ((error = indir_trunc(freeblks, fsbtodb(fs, bn), level,
- baselbns[level], &blocksreleased)) == 0)
- allerror = error;
- ffs_blkfree(fs, freeblks->fb_devvp, bn, fs->fs_bsize,
- freeblks->fb_previousinum);
- fs->fs_pendingblocks -= nblocks;
- blocksreleased += nblocks;
+ if (freeblks->fb_oldextsize > 0) {
+ for (i = (NXADDR - 1); i >= 0; i--) {
+ if ((bn = freeblks->fb_eblks[i]) == 0)
+ continue;
+ bsize = sblksize(fs, freeblks->fb_oldextsize, i);
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
+ freeblks->fb_previousinum);
+ blocksreleased += btodb(bsize);
+ }
}
/*
- * All direct blocks or frags.
+ * Release all data blocks or frags.
*/
- for (i = (NDADDR - 1); i >= 0; i--) {
- if ((bn = freeblks->fb_dblks[i]) == 0)
- continue;
- bsize = sblksize(fs, freeblks->fb_oldsize, i);
- ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
- freeblks->fb_previousinum);
- fs->fs_pendingblocks -= btodb(bsize);
- blocksreleased += btodb(bsize);
+ if (freeblks->fb_oldsize > 0) {
+ /*
+ * Indirect blocks first.
+ */
+ for (level = (NIADDR - 1); level >= 0; level--) {
+ if ((bn = freeblks->fb_iblks[level]) == 0)
+ continue;
+ if ((error = indir_trunc(freeblks, fsbtodb(fs, bn),
+ level, baselbns[level], &blocksreleased)) == 0)
+ allerror = error;
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, fs->fs_bsize,
+ freeblks->fb_previousinum);
+ fs->fs_pendingblocks -= nblocks;
+ blocksreleased += nblocks;
+ }
+ /*
+ * All direct blocks or frags.
+ */
+ for (i = (NDADDR - 1); i >= 0; i--) {
+ if ((bn = freeblks->fb_dblks[i]) == 0)
+ continue;
+ bsize = sblksize(fs, freeblks->fb_oldsize, i);
+ ffs_blkfree(fs, freeblks->fb_devvp, bn, bsize,
+ freeblks->fb_previousinum);
+ fs->fs_pendingblocks -= btodb(bsize);
+ blocksreleased += btodb(bsize);
+ }
}
/*
* If we still have not finished background cleanup, then check
@@ -3049,6 +3210,8 @@ softdep_releasefile(ip)
struct inode *ip; /* inode with the zero effective link count */
{
struct inodedep *inodedep;
+ struct fs *fs;
+ int extblocks;
if (ip->i_effnlink > 0)
panic("softdep_filerelease: file still referenced");
@@ -3073,7 +3236,11 @@ softdep_releasefile(ip)
if ((inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep)))
inodedep->id_state |= SPACECOUNTED;
FREE_LOCK(&lk);
- ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks);
+ fs = ip->i_fs;
+ extblocks = 0;
+ if (fs->fs_magic == FS_UFS2_MAGIC)
+ extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
+ ip->i_fs->fs_pendingblocks += DIP(ip, i_blocks) - extblocks;
ip->i_fs->fs_pendinginodes += 1;
ip->i_flag |= IN_SPACECOUNTED;
}
@@ -3404,6 +3571,7 @@ initiate_write_inodeblock_ufs1(inodedep, bp)
* If no dependencies, then there is nothing to roll back.
*/
inodedep->id_savedsize = dp->di_size;
+ inodedep->id_savedextsize = 0;
if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
return;
/*
@@ -3556,12 +3724,81 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
* If no dependencies, then there is nothing to roll back.
*/
inodedep->id_savedsize = dp->di_size;
- if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
+ inodedep->id_savedextsize = dp->di_extsize;
+ if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL &&
+ TAILQ_FIRST(&inodedep->id_extupdt) == NULL)
return;
/*
- * Set the dependencies to busy.
+ * Set the ext data dependencies to busy.
*/
ACQUIRE_LOCK(&lk);
+ for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+ adp = TAILQ_NEXT(adp, ad_next)) {
+#ifdef DIAGNOSTIC
+ if (deplist != 0 && prevlbn >= adp->ad_lbn) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: lbn order");
+ }
+ prevlbn = adp->ad_lbn;
+ if (dp->di_extb[adp->ad_lbn] != adp->ad_newblkno) {
+ FREE_LOCK(&lk);
+ panic("%s: direct pointer #%jd mismatch %jd != %jd",
+ "softdep_write_inodeblock",
+ (intmax_t)adp->ad_lbn,
+ (intmax_t)dp->di_extb[adp->ad_lbn],
+ (intmax_t)adp->ad_newblkno);
+ }
+ deplist |= 1 << adp->ad_lbn;
+ if ((adp->ad_state & ATTACHED) == 0) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: Unknown state 0x%x",
+ adp->ad_state);
+ }
+#endif /* DIAGNOSTIC */
+ adp->ad_state &= ~ATTACHED;
+ adp->ad_state |= UNDONE;
+ }
+ /*
+ * The on-disk inode cannot claim to be any larger than the last
+ * fragment that has been written. Otherwise, the on-disk inode
+ * might have fragments that were not the last block in the ext
+ * data which would corrupt the filesystem.
+ */
+ for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
+ lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
+ dp->di_extb[adp->ad_lbn] = adp->ad_oldblkno;
+ /* keep going until hitting a rollback to a frag */
+ if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
+ continue;
+ dp->di_extsize = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
+ for (i = adp->ad_lbn + 1; i < NXADDR; i++) {
+#ifdef DIAGNOSTIC
+ if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0) {
+ FREE_LOCK(&lk);
+ panic("softdep_write_inodeblock: lost dep1");
+ }
+#endif /* DIAGNOSTIC */
+ dp->di_extb[i] = 0;
+ }
+ lastadp = NULL;
+ break;
+ }
+ /*
+ * If we have zero'ed out the last allocated block of the ext
+ * data, roll back the size to the last currently allocated block.
+ * We know that this last allocated block is a full-sized as
+ * we already checked for fragments in the loop above.
+ */
+ if (lastadp != NULL &&
+ dp->di_extsize <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
+ for (i = lastadp->ad_lbn; i >= 0; i--)
+ if (dp->di_extb[i] != 0)
+ break;
+ dp->di_extsize = (i + 1) * fs->fs_bsize;
+ }
+ /*
+ * Set the file data dependencies to busy.
+ */
for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
adp = TAILQ_NEXT(adp, ad_next)) {
#ifdef DIAGNOSTIC
@@ -3617,7 +3854,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
#ifdef DIAGNOSTIC
if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
FREE_LOCK(&lk);
- panic("softdep_write_inodeblock: lost dep1");
+ panic("softdep_write_inodeblock: lost dep2");
}
#endif /* DIAGNOSTIC */
dp->di_db[i] = 0;
@@ -3627,7 +3864,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
if (dp->di_ib[i] != 0 &&
(deplist & ((1 << NDADDR) << i)) == 0) {
FREE_LOCK(&lk);
- panic("softdep_write_inodeblock: lost dep2");
+ panic("softdep_write_inodeblock: lost dep3");
}
#endif /* DIAGNOSTIC */
dp->di_ib[i] = 0;
@@ -3805,6 +4042,7 @@ static void
handle_allocdirect_partdone(adp)
struct allocdirect *adp; /* the completed allocdirect */
{
+ struct allocdirectlst *listhead;
struct allocdirect *listadp;
struct inodedep *inodedep;
long bsize, delay;
@@ -3822,11 +4060,16 @@ handle_allocdirect_partdone(adp)
* which would corrupt the filesystem. Thus, we cannot free any
* allocdirects after one whose ad_oldblkno claims a fragment as
* these blocks must be rolled back to zero before writing the inode.
- * We check the currently active set of allocdirects in id_inoupdt.
+ * We check the currently active set of allocdirects in id_inoupdt
+ * or id_extupdt as appropriate.
*/
inodedep = adp->ad_inodedep;
bsize = inodedep->id_fs->fs_bsize;
- TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
+ if (adp->ad_state & EXTDATA)
+ listhead = &inodedep->id_extupdt;
+ else
+ listhead = &inodedep->id_inoupdt;
+ TAILQ_FOREACH(listadp, listhead, ad_next) {
/* found our block */
if (listadp == adp)
break;
@@ -3845,7 +4088,11 @@ handle_allocdirect_partdone(adp)
*/
if (listadp == NULL) {
#ifdef DEBUG
- TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
+ if (adp->ad_state & EXTDATA)
+ listhead = &inodedep->id_newextupdt;
+ else
+ listhead = &inodedep->id_newinoupdt;
+ TAILQ_FOREACH(listadp, listhead, ad_next)
/* found our block */
if (listadp == adp)
break;
@@ -3868,7 +4115,7 @@ handle_allocdirect_partdone(adp)
listadp = TAILQ_NEXT(adp, ad_next);
if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
return;
- free_allocdirect(&inodedep->id_inoupdt, adp, delay);
+ free_allocdirect(listhead, adp, delay);
}
}
@@ -4023,12 +4270,31 @@ handle_written_inodeblock(inodedep, bp)
adp->ad_state |= ATTACHED;
hadchanges = 1;
}
+ for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {
+ nextadp = TAILQ_NEXT(adp, ad_next);
+ if (adp->ad_state & ATTACHED) {
+ lk.lkt_held = NOHOLDER;
+ panic("handle_written_inodeblock: new entry");
+ }
+ if (dp2->di_extb[adp->ad_lbn] != adp->ad_oldblkno) {
+ lk.lkt_held = NOHOLDER;
+ panic("%s: direct pointers #%jd %s %jd != %jd",
+ "handle_written_inodeblock",
+ (intmax_t)adp->ad_lbn, "mismatch",
+ (intmax_t)dp2->di_extb[adp->ad_lbn],
+ (intmax_t)adp->ad_oldblkno);
+ }
+ dp2->di_extb[adp->ad_lbn] = adp->ad_newblkno;
+ adp->ad_state &= ~UNDONE;
+ adp->ad_state |= ATTACHED;
+ hadchanges = 1;
+ }
if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
stat_direct_blk_ptrs++;
/*
* Reset the file size to its most up-to-date value.
*/
- if (inodedep->id_savedsize == -1) {
+ if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1) {
lk.lkt_held = NOHOLDER;
panic("handle_written_inodeblock: bad size");
}
@@ -4042,8 +4308,13 @@ handle_written_inodeblock(inodedep, bp)
dp2->di_size = inodedep->id_savedsize;
hadchanges = 1;
}
+ if (dp2->di_extsize != inodedep->id_savedextsize) {
+ dp2->di_extsize = inodedep->id_savedextsize;
+ hadchanges = 1;
+ }
}
inodedep->id_savedsize = -1;
+ inodedep->id_savedextsize = -1;
/*
* If there were any rollbacks in the inode block, then it must be
* marked dirty so that its will eventually get written back in
@@ -4056,6 +4327,8 @@ handle_written_inodeblock(inodedep, bp)
*/
if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
handle_allocdirect_partdone(adp);
+ if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
+ handle_allocdirect_partdone(adp);
/*
* Process deallocations that were held pending until the
* inode had been written to disk. Freeing of the inode
@@ -4119,7 +4392,9 @@ handle_written_inodeblock(inodedep, bp)
/*
* If no outstanding dependencies, free it.
*/
- if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
+ if (free_inodedep(inodedep) ||
+ (TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
+ TAILQ_FIRST(&inodedep->id_extupdt) == 0))
return (0);
return (hadchanges);
}
@@ -4358,9 +4633,12 @@ softdep_update_inodeblock(ip, bp, waitfor)
* the in-memory copy of the inode. Once merged process any
* allocdirects that are completed by the merger.
*/
- merge_inode_lists(inodedep);
+ merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
+ merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
+ if (TAILQ_FIRST(&inodedep->id_extupdt) != NULL)
+ handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt));
/*
* Now that the inode has been pushed into the buffer, the
* operations dependent on the inode being written to disk
@@ -4392,34 +4670,35 @@ softdep_update_inodeblock(ip, bp, waitfor)
}
/*
- * Merge the new inode dependency list (id_newinoupdt) into the old
- * inode dependency list (id_inoupdt). This routine must be called
- * with splbio interrupts blocked.
+ * Merge the a new inode dependency list (such as id_newinoupdt) into an
+ * old inode dependency list (such as id_inoupdt). This routine must be
+ * called with splbio interrupts blocked.
*/
static void
-merge_inode_lists(inodedep)
- struct inodedep *inodedep;
+merge_inode_lists(newlisthead, oldlisthead)
+ struct allocdirectlst *newlisthead;
+ struct allocdirectlst *oldlisthead;
{
struct allocdirect *listadp, *newadp;
- newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
- for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
+ newadp = TAILQ_FIRST(newlisthead);
+ for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
if (listadp->ad_lbn < newadp->ad_lbn) {
listadp = TAILQ_NEXT(listadp, ad_next);
continue;
}
- TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
+ TAILQ_REMOVE(newlisthead, newadp, ad_next);
TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
if (listadp->ad_lbn == newadp->ad_lbn) {
- allocdirect_merge(&inodedep->id_inoupdt, newadp,
+ allocdirect_merge(oldlisthead, newadp,
listadp);
listadp = newadp;
}
- newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
+ newadp = TAILQ_FIRST(newlisthead);
}
- while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
- TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
- TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
+ while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {
+ TAILQ_REMOVE(newlisthead, newadp, ad_next);
+ TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);
}
}
@@ -4454,6 +4733,8 @@ softdep_fsync(vp)
}
if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
LIST_FIRST(&inodedep->id_bufwait) != NULL ||
+ TAILQ_FIRST(&inodedep->id_extupdt) != NULL ||
+ TAILQ_FIRST(&inodedep->id_newextupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) {
FREE_LOCK(&lk);
@@ -4877,9 +5158,7 @@ flush_inodedep_deps(fs, ino)
ino_t ino;
{
struct inodedep *inodedep;
- struct allocdirect *adp;
int error, waitfor;
- struct buf *bp;
/*
* This work is done in two passes. The first pass grabs most
@@ -4894,52 +5173,17 @@ flush_inodedep_deps(fs, ino)
* We give a brief window at the top of the loop to allow
* any pending I/O to complete.
*/
- for (waitfor = MNT_NOWAIT; ; ) {
+ for (error = 0, waitfor = MNT_NOWAIT; ; ) {
+ if (error)
+ return (error);
FREE_LOCK(&lk);
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
- TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
- if (adp->ad_state & DEPCOMPLETE)
- continue;
- bp = adp->ad_buf;
- if (getdirtybuf(&bp, waitfor) == 0) {
- if (waitfor == MNT_NOWAIT)
- continue;
- break;
- }
- FREE_LOCK(&lk);
- if (waitfor == MNT_NOWAIT) {
- bawrite(bp);
- } else if ((error = BUF_WRITE(bp)) != 0) {
- ACQUIRE_LOCK(&lk);
- return (error);
- }
- ACQUIRE_LOCK(&lk);
- break;
- }
- if (adp != NULL)
- continue;
- TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
- if (adp->ad_state & DEPCOMPLETE)
- continue;
- bp = adp->ad_buf;
- if (getdirtybuf(&bp, waitfor) == 0) {
- if (waitfor == MNT_NOWAIT)
- continue;
- break;
- }
- FREE_LOCK(&lk);
- if (waitfor == MNT_NOWAIT) {
- bawrite(bp);
- } else if ((error = BUF_WRITE(bp)) != 0) {
- ACQUIRE_LOCK(&lk);
- return (error);
- }
- ACQUIRE_LOCK(&lk);
- break;
- }
- if (adp != NULL)
+ if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
+ flush_deplist(&inodedep->id_newextupdt, waitfor, &error))
continue;
/*
* If pass2, we are done, otherwise do pass 2.
@@ -4957,6 +5201,41 @@ flush_inodedep_deps(fs, ino)
}
/*
+ * Flush an inode dependency list.
+ * Called with splbio blocked.
+ */
+static int
+flush_deplist(listhead, waitfor, errorp)
+ struct allocdirectlst *listhead;
+ int waitfor;
+ int *errorp;
+{
+ struct allocdirect *adp;
+ struct buf *bp;
+
+ TAILQ_FOREACH(adp, listhead, ad_next) {
+ if (adp->ad_state & DEPCOMPLETE)
+ continue;
+ bp = adp->ad_buf;
+ if (getdirtybuf(&bp, waitfor) == 0) {
+ if (waitfor == MNT_NOWAIT)
+ continue;
+ return (1);
+ }
+ FREE_LOCK(&lk);
+ if (waitfor == MNT_NOWAIT) {
+ bawrite(bp);
+ } else if ((*errorp = BUF_WRITE(bp)) != 0) {
+ ACQUIRE_LOCK(&lk);
+ return (1);
+ }
+ ACQUIRE_LOCK(&lk);
+ return (1);
+ }
+ return (0);
+}
+
+/*
* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
* Called with splbio blocked.
*/
@@ -5406,6 +5685,12 @@ softdep_count_dependencies(bp, wantcount)
if (!wantcount)
goto out;
}
+ if (TAILQ_FIRST(&inodedep->id_extupdt)) {
+ /* direct block pointer dependency */
+ retval += 1;
+ if (!wantcount)
+ goto out;
+ }
continue;
case D_INDIRDEP:
diff --git a/sys/ufs/ffs/ffs_softdep_stub.c b/sys/ufs/ffs/ffs_softdep_stub.c
index df084c7dddc8..c20b53c1e51c 100644
--- a/sys/ufs/ffs/ffs_softdep_stub.c
+++ b/sys/ufs/ffs/ffs_softdep_stub.c
@@ -123,6 +123,20 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
panic("softdep_setup_allocdirect called");
}
+void
+softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ ufs2_daddr_t newblkno;
+ ufs2_daddr_t oldblkno;
+ long newsize;
+ long oldsize;
+ struct buf *bp;
+{
+
+ panic("softdep_setup_allocdirect called");
+}
+
void
softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
struct inode *ip;
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index cbee51b5a84f..f29e89f2622d 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -89,8 +89,10 @@
* dependencies are complete. The INPROGRESS flag marks worklist
* structures that are still on the worklist, but are being considered
* for action by some process. The UFS1FMT flag indicates that the
- * inode being processed is a ufs1 format. The ONWORKLIST flag shows
- * whether the structure is currently linked onto a worklist.
+ * inode being processed is a ufs1 format. The EXTDATA flag indicates
+ * that the allocdirect describes an extended-attributes dependency.
+ * The ONWORKLIST flag shows whether the structure is currently linked
+ * onto a worklist.
*/
#define ATTACHED 0x0001
#define UNDONE 0x0002
@@ -106,6 +108,7 @@
#define NEWBLOCK 0x0800 /* pagedep only */
#define INPROGRESS 0x1000 /* dirrem, freeblks, freefrag, freefile only */
#define UFS1FMT 0x2000 /* indirdep only */
+#define EXTDATA 0x4000 /* allocdirect only */
#define ONWORKLIST 0x8000
#define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE)
@@ -251,12 +254,15 @@ struct inodedep {
nlink_t id_nlinkdelta; /* saved effective link count */
LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */
struct buf *id_buf; /* related bmsafemap (if pending) */
+ long id_savedextsize; /* ext size saved during rollback */
off_t id_savedsize; /* file size saved during rollback */
struct workhead id_pendinghd; /* entries awaiting directory write */
struct workhead id_bufwait; /* operations after inode written */
struct workhead id_inowait; /* operations waiting inode update */
struct allocdirectlst id_inoupdt; /* updates before inode written */
struct allocdirectlst id_newinoupdt; /* updates when inode written */
+ struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */
+ struct allocdirectlst id_newextupdt; /* extdata updates at ino write */
union {
struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */
struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
@@ -427,11 +433,12 @@ struct freeblks {
uid_t fb_uid; /* uid of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */
struct mount *fb_mnt; /* associated mount point */
+ long fb_oldextsize; /* previous ext data size */
off_t fb_oldsize; /* previous file size */
- off_t fb_newsize; /* new file size */
ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */
ufs2_daddr_t fb_dblks[NDADDR]; /* direct blk ptrs to deallocate */
ufs2_daddr_t fb_iblks[NIADDR]; /* indirect blk ptrs to deallocate */
+ ufs2_daddr_t fb_eblks[NXADDR]; /* indirect blk ptrs to deallocate */
};
/*
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index abe2bea0cd87..731354e2a050 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -83,7 +83,7 @@ ufs_bmap(ap)
if (ap->a_bnp == NULL)
return (0);
- error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno,
+ error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL,
ap->a_runp, ap->a_runb);
*ap->a_bnp = blkno;
return (error);
@@ -104,10 +104,11 @@ ufs_bmap(ap)
*/
int
-ufs_bmaparray(vp, bn, bnp, runp, runb)
+ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
struct vnode *vp;
ufs2_daddr_t bn;
ufs2_daddr_t *bnp;
+ struct buf *nbp;
int *runp;
int *runb;
{
@@ -146,7 +147,19 @@ ufs_bmaparray(vp, bn, bnp, runp, runb)
num = *nump;
if (num == 0) {
- *bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
+ if (bn >= 0 && bn < NDADDR) {
+ *bnp = blkptrtodb(ump, DIP(ip, i_db[bn]));
+ } else if (bn < 0 && bn >= -NXADDR) {
+ *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ if (nbp == NULL)
+ panic("ufs_bmaparray: mapping ext data");
+ nbp->b_xflags |= BX_ALTDATA;
+ return (0);
+ } else {
+ panic("ufs_bmaparray: blkno out of range");
+ }
/*
* Since this is FFS independent code, we are out of
* scope for the definitions of BLK_NOCOPY and
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 85b508ae88c7..d4e333c2de9a 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -60,13 +60,15 @@ int ufs_vnoperatefifo(struct vop_generic_args *);
int ufs_vnoperatespec(struct vop_generic_args *);
int ufs_bmap(struct vop_bmap_args *);
-int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *, int *,
- int *);
+int ufs_bmaparray(struct vnode *, ufs2_daddr_t, ufs2_daddr_t *,
+ struct buf *, int *, int *);
int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **);
int ufs_checkpath(struct inode *, struct inode *, struct ucred *);
void ufs_dirbad(struct inode *, doff_t, char *);
int ufs_dirbadentry(struct vnode *, struct direct *, int);
int ufs_dirempty(struct inode *, ino_t, struct ucred *);
+int ufs_extread(struct vop_read_args *);
+int ufs_extwrite(struct vop_write_args *);
void ufs_makedirentry(struct inode *, struct componentname *,
struct direct *);
int ufs_direnter(struct vnode *, struct vnode *, struct direct *,
@@ -107,10 +109,12 @@ void softdep_change_linkcnt(struct inode *);
void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
-/* Flags to low-level allocation routines. */
-#define BA_CLRBUF 0x01 /* Request allocated buffer be cleared. */
-#define BA_SYNC 0x02 /* Do all allocations synchronously. */
-#define BA_METAONLY 0x04 /* Return indirect block buffer. */
-#define BA_NOWAIT 0x08 /* do not sleep to await lock */
+/*
+ * Flags to low-level allocation routines.
+ * The low 16-bits are reserved for IO_ flags from vnode.h.
+ */
+#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */
+#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
+#define BA_NOWAIT 0x00040000 /* Do not sleep to await lock. */
#endif /* !_UFS_UFS_EXTERN_H_ */
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index c9ac36259dd4..3166fecc8c9b 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -95,7 +95,8 @@ ufs_inactive(ap)
#ifdef UFS_EXTATTR
ufs_extattr_vnode_inactive(ap->a_vp, ap->a_td);
#endif
- error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, td);
+ error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
+ NOCRED, td);
/*
* Setting the mode to zero needs to wait for the inode
* to be written just as does a change to the link count.
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 1df9146e04c6..4515b6db7fcf 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -752,7 +752,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
panic("ufs_direnter: newblk");
flags = BA_CLRBUF;
if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
- flags |= BA_SYNC;
+ flags |= IO_SYNC;
if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
cr, flags, &bp)) != 0) {
if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
@@ -961,7 +961,8 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
if (dp->i_dirhash != NULL)
ufsdirhash_dirtrunc(dp, dp->i_endoff);
#endif
- (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, td);
+ (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+ IO_NORMAL | IO_SYNC, cr, td);
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td);
}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 406832ed6386..9db4e8794e35 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,13 @@
/*-
+ * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Marshall
+ * Kirk McKusick and Network Associates Laboratories, the Security
+ * Research Division of Network Associates, Inc. under DARPA/SPAWAR
+ * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
+ * research program
+ *
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
@@ -77,6 +86,9 @@ READ(ap)
int ioflag;
vm_object_t object;
+ if (ap->a_ioflag & IO_EXT)
+ return (ufs_extread(ap));
+
GIANT_REQUIRED;
vp = ap->a_vp;
@@ -400,6 +412,9 @@ WRITE(ap)
int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
vm_object_t object;
+ if (ap->a_ioflag & IO_EXT)
+ return (ufs_extwrite(ap));
+
GIANT_REQUIRED;
extended = 0;
@@ -471,7 +486,7 @@ WRITE(ap)
osize = ip->i_size;
flags = 0;
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
- flags = BA_SYNC;
+ flags = IO_SYNC;
#ifdef ENABLE_VFS_IOOPT
if (object && (object->flags & OBJ_OPT)) {
@@ -581,7 +596,8 @@ WRITE(ap)
if (error) {
if (ioflag & IO_UNIT) {
(void)UFS_TRUNCATE(vp, osize,
- ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
+ IO_NORMAL | (ioflag & IO_SYNC),
+ ap->a_cred, uio->uio_td);
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
@@ -595,7 +611,6 @@ WRITE(ap)
return (error);
}
-
/*
* get page routine
*/
@@ -661,7 +676,7 @@ ffs_getpages(ap)
poff = (foff % bsize) / PAGE_SIZE;
dp = VTOI(vp)->i_devvp;
- if (ufs_bmaparray(vp, reqlblkno, &reqblkno, &bforwards, &bbackwards)
+ if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards)
|| (reqblkno == -1)) {
for(i = 0; i < pcount; i++) {
if (i != ap->a_reqpage)
@@ -730,3 +745,321 @@ ffs_getpages(ap)
return (rtval);
}
+
+/*
+ * Vnode op for reading.
+ */
+/* ARGSUSED */
+int
+ufs_extread(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct inode *ip;
+ struct ufs2_dinode *dp;
+ struct uio *uio;
+ struct fs *fs;
+ struct buf *bp;
+ ufs_lbn_t lbn, nextlbn;
+ off_t bytesinfile;
+ long size, xfersize, blkoffset;
+ int error, orig_resid;
+ mode_t mode;
+ int ioflag;
+
+ GIANT_REQUIRED;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ dp = ip->i_din2;
+ mode = ip->i_mode;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
+ panic("ufs_extread: mode");
+
+#endif
+ orig_resid = uio->uio_resid;
+ if (orig_resid <= 0)
+ return (0);
+
+ bytesinfile = dp->di_extsize - uio->uio_offset;
+ if (bytesinfile <= 0) {
+ if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ ip->i_flag |= IN_ACCESS;
+ return 0;
+ }
+
+ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
+ if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
+ break;
+
+ lbn = lblkno(fs, uio->uio_offset);
+ nextlbn = lbn + 1;
+
+ /*
+ * size of buffer. The buffer representing the
+ * end of the file is rounded up to the size of
+ * the block type ( fragment or full block,
+ * depending ).
+ */
+ size = sblksize(fs, dp->di_extsize, lbn);
+ blkoffset = blkoff(fs, uio->uio_offset);
+
+ /*
+ * The amount we want to transfer in this iteration is
+ * one FS block less the amount of the data before
+ * our startpoint (duh!)
+ */
+ xfersize = fs->fs_bsize - blkoffset;
+
+ /*
+ * But if we actually want less than the block,
+ * or the file doesn't have a whole block more of data,
+ * then use the lesser number.
+ */
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+ if (bytesinfile < xfersize)
+ xfersize = bytesinfile;
+
+ if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
+ /*
+ * Don't do readahead if this is the end of the info.
+ */
+ error = bread(vp, -1 - lbn, size, NOCRED, &bp);
+ } else {
+ /*
+ * If we have a second block, then
+ * fire off a request for a readahead
+ * as well as a read. Note that the 4th and 5th
+ * arguments point to arrays of the size specified in
+ * the 6th argument.
+ */
+ int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
+
+ nextlbn = -1 - nextlbn;
+ error = breadn(vp, -1 - lbn,
+ size, &nextlbn, &nextsize, 1, NOCRED, &bp);
+ }
+ if (error) {
+ brelse(bp);
+ bp = NULL;
+ break;
+ }
+
+ /*
+ * If IO_DIRECT then set B_DIRECT for the buffer. This
+ * will cause us to attempt to release the buffer later on
+ * and will cause the buffer cache to attempt to free the
+ * underlying pages.
+ */
+ if (ioflag & IO_DIRECT)
+ bp->b_flags |= B_DIRECT;
+
+ /*
+ * We should only get non-zero b_resid when an I/O error
+ * has occurred, which should cause us to break above.
+ * However, if the short read did not cause an error,
+ * then we want to ensure that we do not uiomove bad
+ * or uninitialized data.
+ */
+ size -= bp->b_resid;
+ if (size < xfersize) {
+ if (size == 0)
+ break;
+ xfersize = size;
+ }
+
+ error = uiomove((char *)bp->b_data + blkoffset,
+ (int)xfersize, uio);
+ if (error)
+ break;
+
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ /*
+ * If there are no dependencies, and it's VMIO,
+ * then we don't need the buf, mark it available
+ * for freeing. The VM has the data.
+ */
+ bp->b_flags |= B_RELBUF;
+ brelse(bp);
+ } else {
+ /*
+ * Otherwise let whoever
+ * made the request take care of
+ * freeing it. We just queue
+ * it onto another list.
+ */
+ bqrelse(bp);
+ }
+ }
+
+ /*
+ * This can only happen in the case of an error
+ * because the loop above resets bp to NULL on each iteration
+ * and on normal completion has not set a new value into it.
+ * so it must have come from a 'break' statement
+ */
+ if (bp != NULL) {
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ bp->b_flags |= B_RELBUF;
+ brelse(bp);
+ } else {
+ bqrelse(bp);
+ }
+ }
+
+ if ((error == 0 || uio->uio_resid != orig_resid) &&
+ (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
+ ip->i_flag |= IN_ACCESS;
+ return (error);
+}
+
+/*
+ * Vnode op for external attribute writing.
+ */
+int
+ufs_extwrite(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct uio *uio;
+ struct inode *ip;
+ struct ufs2_dinode *dp;
+ struct fs *fs;
+ struct buf *bp;
+ ufs_lbn_t lbn;
+ off_t osize;
+ int blkoffset, error, flags, ioflag, resid, size, xfersize;
+
+ GIANT_REQUIRED;
+
+ vp = ap->a_vp;
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ dp = ip->i_din2;
+ uio = ap->a_uio;
+ ioflag = ap->a_ioflag;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
+ panic("ext_write: mode");
+#endif
+
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = dp->di_extsize;
+
+ if (uio->uio_offset < 0 ||
+ (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize)
+ return (EFBIG);
+
+ resid = uio->uio_resid;
+ osize = dp->di_extsize;
+ flags = IO_EXT;
+ if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
+ flags |= IO_SYNC;
+
+ for (error = 0; uio->uio_resid > 0;) {
+ lbn = lblkno(fs, uio->uio_offset);
+ blkoffset = blkoff(fs, uio->uio_offset);
+ xfersize = fs->fs_bsize - blkoffset;
+ if (uio->uio_resid < xfersize)
+ xfersize = uio->uio_resid;
+
+ /*
+ * We must perform a read-before-write if the transfer size
+ * does not cover the entire buffer.
+ */
+ if (fs->fs_bsize > xfersize)
+ flags |= BA_CLRBUF;
+ else
+ flags &= ~BA_CLRBUF;
+ error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
+ ap->a_cred, flags, &bp);
+ if (error != 0)
+ break;
+ /*
+ * If the buffer is not valid we have to clear out any
+ * garbage data from the pages instantiated for the buffer.
+ * If we do not, a failed uiomove() during a write can leave
+ * the prior contents of the pages exposed to a userland
+ * mmap(). XXX deal with uiomove() errors a better way.
+ */
+ if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
+ vfs_bio_clrbuf(bp);
+ if (ioflag & IO_DIRECT)
+ bp->b_flags |= B_DIRECT;
+ if (ioflag & IO_NOWDRAIN)
+ bp->b_flags |= B_NOWDRAIN;
+
+ if (uio->uio_offset + xfersize > dp->di_extsize)
+ dp->di_extsize = uio->uio_offset + xfersize;
+
+ size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
+ if (size < xfersize)
+ xfersize = size;
+
+ error =
+ uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+ if ((ioflag & (IO_VMIO|IO_DIRECT)) &&
+ (LIST_FIRST(&bp->b_dep) == NULL)) {
+ bp->b_flags |= B_RELBUF;
+ }
+
+ /*
+ * If IO_SYNC each buffer is written synchronously. Otherwise
+ * if we have a severe page deficiency write the buffer
+ * asynchronously. Otherwise try to cluster, and if that
+ * doesn't do it then either do an async write (if O_DIRECT),
+ * or a delayed write (if not).
+ */
+ if (ioflag & IO_SYNC) {
+ (void)bwrite(bp);
+ } else if (vm_page_count_severe() ||
+ buf_dirty_count_severe() ||
+ xfersize + blkoffset == fs->fs_bsize ||
+ (ioflag & (IO_ASYNC | IO_DIRECT)))
+ bawrite(bp);
+ else
+ bdwrite(bp);
+ if (error || xfersize == 0)
+ break;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ /*
+ * If we successfully wrote any data, and we are not the superuser
+ * we clear the setuid and setgid bits as a precaution against
+ * tampering.
+ */
+ if (resid > uio->uio_resid && ap->a_cred &&
+ suser_cred(ap->a_cred, PRISON_ROOT)) {
+ ip->i_mode &= ~(ISUID | ISGID);
+ dp->di_mode = ip->i_mode;
+ }
+ if (error) {
+ if (ioflag & IO_UNIT) {
+ (void)UFS_TRUNCATE(vp, osize,
+ IO_EXT | (ioflag&IO_SYNC), ap->a_cred, uio->uio_td);
+ uio->uio_offset -= resid - uio->uio_resid;
+ uio->uio_resid = resid;
+ }
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+ error = UFS_UPDATE(vp, 1);
+ return (error);
+}
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 0ef9ed2c3b87..66d8319280ff 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -555,7 +555,8 @@ ufs_setattr(ap)
default:
break;
}
- if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, td)) != 0)
+ if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL,
+ cred, td)) != 0)
return (error);
}
if (vap->va_atime.tv_sec != VNOVAL ||
@@ -1268,7 +1269,9 @@ abortit:
xp->i_nlink--;
DIP(xp, i_nlink) = xp->i_nlink;
xp->i_flag |= IN_CHANGE;
- ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
+ ioflag = IO_NORMAL;
+ if (DOINGASYNC(tvp))
+ ioflag |= IO_SYNC;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_thread)) != 0)
goto bad;
@@ -1762,7 +1765,9 @@ ufs_rmdir(ap)
ip->i_nlink--;
DIP(ip, i_nlink) = ip->i_nlink;
ip->i_flag |= IN_CHANGE;
- ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
+ ioflag = IO_NORMAL;
+ if (DOINGASYNC(vp))
+ ioflag |= IO_SYNC;
error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
cnp->cn_thread);
}
@@ -1980,7 +1985,7 @@ ufs_strategy(ap)
if (vp->v_type == VBLK || vp->v_type == VCHR)
panic("ufs_strategy: spec");
if (bp->b_blkno == bp->b_lblkno) {
- error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
+ error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
bp->b_blkno = blkno;
if (error) {
bp->b_error = error;