aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorChuck Silvers <chs@FreeBSD.org>2020-05-25 23:47:31 +0000
committerChuck Silvers <chs@FreeBSD.org>2020-05-25 23:47:31 +0000
commitd79ff54b5caad61e657b9cc651aa256f3949b97a (patch)
tree24dde841c02021c0b6542edf676014835312fcf0 /sys
parentb02676a2cbf7dc2fe7c1eb2895667fa846030cd8 (diff)
downloadsrc-d79ff54b5caad61e657b9cc651aa256f3949b97a.tar.gz
src-d79ff54b5caad61e657b9cc651aa256f3949b97a.zip
This commit enables a UFS filesystem to do a forcible unmount when
the underlying media fails or becomes inaccessible. For example when a USB flash memory card hosting a UFS filesystem is unplugged. The strategy for handling disk I/O errors when soft updates are enabled is to stop writing to the disk of the affected file system but continue to accept I/O requests and report that all future writes by the file system to that disk actually succeed. Then initiate an asynchronous forced unmount of the affected file system. There are two cases for disk I/O errors: - ENXIO, which means that this disk is gone and the lower layers of the storage stack already guarantee that no future I/O to this disk will succeed. - EIO (or most other errors), which means that this particular I/O request has failed but subsequent I/O requests to this disk might still succeed. For ENXIO, we can just clear the error and continue, because we know that the file system cannot affect the on-disk state after we see this error. For EIO or other errors, we arrange for the geom_vfs layer to reject all future I/O requests with ENXIO just like is done when the geom_vfs is orphaned. In both cases, the file system code can just clear the error and proceed with the forcible unmount. This new treatment of I/O errors is needed for writes of any buffer that is involved in a dependency. Most dependencies are described by a structure attached to the buffer's b_dep field. But some are created and processed as a result of the completion of the dependencies attached to the buffer. Clearing of some dependencies require a read. For example if there is a dependency that requires an inode to be written, the disk block containing that inode must be read, the updated inode copied into place in that buffer, and the buffer then written back to disk. Often the needed buffer is already in memory and can be used. But if it needs to be read from the disk, the read will fail, so we fabricate a buffer full of zeroes and pretend that the read succeeded. This zero'ed buffer can be updated and written back to disk. The only case where a buffer full of zeros causes the code to do the wrong thing is when reading an inode buffer containing an inode that still has an inode dependency in memory that will reinitialize the effective link count (i_effnlink) based on the actual link count (i_nlink) that we read. To handle this case we now store the i_nlink value that we wrote in the inode dependency so that it can be restored into the zero'ed buffer thus keeping the tracking of the inode link count consistent. Because applications depend on knowing when an attempt to write their data to stable storage has failed, the fsync(2) and msync(2) system calls need to return errors if data fails to be written to stable storage. So these operations return ENXIO for every call made on files in a file system where we have otherwise been ignoring I/O errors. Coauthered by: mckusick Reviewed by: kib Tested by: Peter Holm Approved by: mckusick (mentor) Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D24088
Notes
Notes: svn path=/head/; revision=361491
Diffstat (limited to 'sys')
-rw-r--r--sys/geom/geom_vfs.c10
-rw-r--r--sys/kern/vfs_bio.c4
-rw-r--r--sys/sys/buf.h5
-rw-r--r--sys/ufs/ffs/ffs_alloc.c74
-rw-r--r--sys/ufs/ffs/ffs_balloc.c12
-rw-r--r--sys/ufs/ffs/ffs_extern.h6
-rw-r--r--sys/ufs/ffs/ffs_inode.c15
-rw-r--r--sys/ufs/ffs/ffs_softdep.c108
-rw-r--r--sys/ufs/ffs/ffs_subr.c91
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c46
-rw-r--r--sys/ufs/ffs/ffs_vnops.c19
-rw-r--r--sys/ufs/ffs/softdep.h1
-rw-r--r--sys/ufs/ufs/ufs_vnops.c5
-rw-r--r--sys/ufs/ufs/ufsmount.h10
14 files changed, 333 insertions, 73 deletions
diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c
index 2136beb6573e..727fa726179d 100644
--- a/sys/geom/geom_vfs.c
+++ b/sys/geom/geom_vfs.c
@@ -55,6 +55,7 @@ struct g_vfs_softc {
struct bufobj *sc_bo;
int sc_active;
int sc_orphaned;
+ int sc_enxio_active;
};
static struct buf_ops __g_vfs_bufops = {
@@ -139,9 +140,14 @@ g_vfs_done(struct bio *bip)
cp = bip->bio_from;
sc = cp->geom->softc;
- if (bip->bio_error && bip->bio_error != EOPNOTSUPP)
+ if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) {
+ if ((bp->b_xflags & BX_CVTENXIO) != 0)
+ sc->sc_enxio_active = 1;
+ if (sc->sc_enxio_active)
+ bip->bio_error = ENXIO;
g_print_bio("g_vfs_done():", bip, "error = %d",
bip->bio_error);
+ }
bp->b_error = bip->bio_error;
bp->b_ioflags = bip->bio_flags;
if (bip->bio_error)
@@ -172,7 +178,7 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp)
* If the provider has orphaned us, just return ENXIO.
*/
mtx_lock(&sc->sc_mtx);
- if (sc->sc_orphaned) {
+ if (sc->sc_orphaned || sc->sc_enxio_active) {
mtx_unlock(&sc->sc_mtx);
bp->b_error = ENXIO;
bp->b_ioflags |= BIO_ERROR;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index e15d8a11c631..26bd4d7b84d8 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -2176,6 +2176,8 @@ breadn_flags(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size,
bp->b_flags |= B_CKHASH;
bp->b_ckhashcalc = ckhashfunc;
}
+ if ((flags & GB_CVTENXIO) != 0)
+ bp->b_xflags |= BX_CVTENXIO;
bp->b_ioflags &= ~BIO_ERROR;
if (bp->b_rcred == NOCRED && cred != NOCRED)
bp->b_rcred = crhold(cred);
@@ -2773,6 +2775,7 @@ brelse(struct buf *bp)
panic("brelse: not dirty");
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT);
+ bp->b_xflags &= ~(BX_CVTENXIO);
/* binsfree unlocks bp. */
binsfree(bp, qindex);
}
@@ -2804,6 +2807,7 @@ bqrelse(struct buf *bp)
return;
}
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
+ bp->b_xflags &= ~(BX_CVTENXIO);
if (bp->b_flags & B_MANAGED) {
if (bp->b_flags & B_REMFREE)
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index c4d7da06cfd7..2b0aa1c7f688 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -261,12 +261,14 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
+#define BX_CVTENXIO 0x00000004 /* Convert errors to ENXIO */
#define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
#define BX_ALTDATA 0x00000040 /* Holds extended data */
#define BX_FSPRIV 0x00FF0000 /* Filesystem-specific flags mask */
-#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty"
+#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\3cvtenxio" \
+ "\2clean\1dirty"
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
@@ -487,6 +489,7 @@ buf_track(struct buf *bp __unused, const char *location __unused)
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */
#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */
+#define GB_CVTENXIO 0x0080 /* Convert errors to ENXIO */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 2101e0fea163..1805979e6b72 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -222,6 +222,10 @@ nospace:
softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
+ if (ffs_fsfail_cleanup_locked(ump, 0)) {
+ UFS_UNLOCK(ump);
+ return (ENXIO);
+ }
if (reclaimed > 0 &&
ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
@@ -447,6 +451,12 @@ nospace:
softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
+ if (bp)
+ brelse(bp);
+ if (ffs_fsfail_cleanup_locked(ump, 0)) {
+ UFS_UNLOCK(ump);
+ return (ENXIO);
+ }
if (reclaimed > 0 &&
ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
@@ -456,8 +466,6 @@ nospace:
} else {
UFS_UNLOCK(ump);
}
- if (bp)
- brelse(bp);
return (ENOSPC);
}
@@ -1102,7 +1110,7 @@ ffs_valloc(pvp, mode, cred, vpp)
struct ufsmount *ump;
ino_t ino, ipref;
u_int cg;
- int error, error1, reclaimed;
+ int error, reclaimed;
*vpp = NULL;
pip = VTOI(pvp);
@@ -1137,28 +1145,21 @@ retry:
(allocfcn_t *)ffs_nodealloccg);
if (ino == 0)
goto noinodes;
-
/*
* Get rid of the cached old vnode, force allocation of a new vnode
- * for this inode.
+ * for this inode. If this fails, release the allocated ino and
+ * return the error.
*/
- error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, FFSV_REPLACE);
- if (error) {
- error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
- FFSV_FORCEINSMQ | FFSV_REPLACE);
+ if ((error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
+ FFSV_FORCEINSMQ | FFSV_REPLACE)) != 0) {
ffs_vfree(pvp, ino, mode);
- if (error1 == 0) {
- ip = VTOI(*vpp);
- if (ip->i_mode)
- goto dup_alloc;
- UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
- vput(*vpp);
- }
return (error);
}
+ /*
+ * We got an inode, so check mode and panic if it is already allocated.
+ */
ip = VTOI(*vpp);
if (ip->i_mode) {
-dup_alloc:
printf("mode = 0%o, inum = %ju, fs = %s\n",
ip->i_mode, (uintmax_t)ip->i_number, fs->fs_fsmnt);
panic("ffs_valloc: dup alloc");
@@ -1197,6 +1198,10 @@ noinodes:
softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT);
goto retry;
}
+ if (ffs_fsfail_cleanup_locked(ump, 0)) {
+ UFS_UNLOCK(ump);
+ return (ENXIO);
+ }
if (ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, pip->i_number, "out of inodes");
@@ -2230,6 +2235,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
struct mount *mp;
struct cg *cgp;
struct buf *bp;
+ daddr_t dbn;
ufs1_daddr_t fragno, cgbno;
int i, blk, frags, bbase, error;
u_int cg;
@@ -2262,8 +2268,23 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
ffs_fserr(fs, inum, "bad block");
return;
}
- if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0)
+ if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
+ if (!ffs_fsfail_cleanup(ump, error) ||
+ !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
+ return;
+ if (devvp->v_type == VREG)
+ dbn = fragstoblks(fs, cgtod(fs, cg));
+ else
+ dbn = fsbtodb(fs, cgtod(fs, cg));
+ error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
+ KASSERT(error == 0, ("getblkx failed"));
+ softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
+ numfrags(fs, size), dephd);
+ bp->b_flags |= B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~B_CACHE;
+ bawrite(bp);
return;
+ }
cgbno = dtogd(fs, bno);
blksfree = cg_blksfree(cgp);
UFS_LOCK(ump);
@@ -2783,6 +2804,7 @@ ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
{
struct cg *cgp;
struct buf *bp;
+ daddr_t dbn;
int error;
u_int cg;
u_int8_t *inosused;
@@ -2804,8 +2826,22 @@ ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
if (ino >= fs->fs_ipg * fs->fs_ncg)
panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
- if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0)
+ if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) {
+ if (!ffs_fsfail_cleanup(ump, error) ||
+ !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR)
+ return (error);
+ if (devvp->v_type == VREG)
+ dbn = fragstoblks(fs, cgtod(fs, cg));
+ else
+ dbn = fsbtodb(fs, cgtod(fs, cg));
+ error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp);
+ KASSERT(error == 0, ("getblkx failed"));
+ softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd);
+ bp->b_flags |= B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~B_CACHE;
+ bawrite(bp);
return (error);
+ }
inosused = cg_inosused(cgp);
cgino = ino % fs->fs_ipg;
if (isclr(inosused, cgino)) {
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 2c0f7795a0a4..8b8d51b98476 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -324,7 +324,8 @@ retry:
UFS_UNLOCK(ump);
goto retry;
}
- if (ppsratecheck(&ump->um_last_fullmsg,
+ if (!ffs_fsfail_cleanup_locked(ump, error) &&
+ ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@@ -407,7 +408,8 @@ retry:
UFS_UNLOCK(ump);
goto retry;
}
- if (ppsratecheck(&ump->um_last_fullmsg,
+ if (!ffs_fsfail_cleanup_locked(ump, error) &&
+ ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@@ -919,7 +921,8 @@ retry:
UFS_UNLOCK(ump);
goto retry;
}
- if (ppsratecheck(&ump->um_last_fullmsg,
+ if (!ffs_fsfail_cleanup_locked(ump, error) &&
+ ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
@@ -1003,7 +1006,8 @@ retry:
UFS_UNLOCK(ump);
goto retry;
}
- if (ppsratecheck(&ump->um_last_fullmsg,
+ if (!ffs_fsfail_cleanup_locked(ump, error) &&
+ ppsratecheck(&ump->um_last_fullmsg,
&ump->um_secs_fullmsg, 1)) {
UFS_UNLOCK(ump);
ffs_fserr(fs, ip->i_number, "filesystem full");
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index eff082fc9b45..98aace3a3430 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -116,6 +116,11 @@ int ffs_vfree(struct vnode *, ino_t, int);
vfs_vget_t ffs_vget;
int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int);
void process_deferred_inactive(struct mount *mp);
+int ffs_fsfail_cleanup(struct ufsmount *, int);
+int ffs_fsfail_cleanup_locked(struct ufsmount *, int);
+int ffs_breadz(struct ufsmount *, struct vnode *, daddr_t, daddr_t, int,
+ daddr_t *, int *, int, struct ucred *, int, void (*)(struct buf *),
+ struct buf **);
/*
* Flags to ffs_vgetf
@@ -162,6 +167,7 @@ void softdep_uninitialize(void);
int softdep_mount(struct vnode *, struct mount *, struct fs *,
struct ucred *);
void softdep_unmount(struct mount *);
+void softdep_handle_error(struct buf *);
int softdep_move_dependencies(struct buf *, struct buf *);
int softdep_flushworklist(struct mount *, int *, struct thread *);
int softdep_flushfiles(struct mount *, int, struct thread *);
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 1783053173be..dfe2b9f12965 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -86,6 +86,7 @@ ffs_update(vp, waitfor)
struct fs *fs;
struct buf *bp;
struct inode *ip;
+ daddr_t bn;
int flags, error;
ASSERT_VOP_ELOCKED(vp, "ffs_update");
@@ -112,9 +113,9 @@ ffs_update(vp, waitfor)
if (IS_SNAPSHOT(ip))
flags = GB_LOCK_NOWAIT;
loop:
- error = bread_gb(ITODEVVP(ip),
- fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
- (int) fs->fs_bsize, NOCRED, flags, &bp);
+ bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
+ error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
+ (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
if (error != 0) {
if (error != EBUSY)
return (error);
@@ -163,9 +164,11 @@ loop:
*/
random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
}
- if (waitfor)
+ if (waitfor) {
error = bwrite(bp);
- else if (vm_page_count_severe() || buf_dirty_count_severe()) {
+ if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
+ error = 0;
+ } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
bawrite(bp);
error = 0;
} else {
@@ -684,7 +687,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
* of having bread() attempt to calculate it using VOP_BMAP().
*/
vp = ITOV(ip);
- error = breadn_flags(vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
+ error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
NOCRED, 0, NULL, &bp);
if (error) {
*countp = 0;
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 8317eec46310..47b0fd6b488c 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -2271,6 +2271,7 @@ inodedep_lookup(mp, inum, flags, inodedeppp)
inodedep->id_ino = inum;
inodedep->id_state = ALLCOMPLETE;
inodedep->id_nlinkdelta = 0;
+ inodedep->id_nlinkwrote = -1;
inodedep->id_savedino1 = NULL;
inodedep->id_savedsize = -1;
inodedep->id_savedextsize = -1;
@@ -3606,6 +3607,7 @@ softdep_process_journal(mp, needwk, flags)
jblocks->jb_needseg = 0;
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
FREE_LOCK(ump);
+ bp->b_xflags |= BX_CVTENXIO;
pbgetvp(ump->um_devvp, bp);
/*
* We only do the blocking wait once we find the journal
@@ -6334,7 +6336,7 @@ setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
* the on-disk address, so we just pass it to bread() instead of
* having bread() attempt to calculate it using VOP_BMAP().
*/
- error = breadn_flags(ITOV(ip), lbn, blkptrtodb(ump, blkno),
+ error = ffs_breadz(ump, ITOV(ip), lbn, blkptrtodb(ump, blkno),
(int)mp->mnt_stat.f_iosize, NULL, NULL, 0, NOCRED, 0, NULL, &bp);
if (error)
return (error);
@@ -6485,6 +6487,15 @@ complete_trunc_indir(freework)
else
WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
&freework->fw_list);
+ if (fwn == NULL) {
+ freework->fw_indir = (void *)0x0000deadbeef0000;
+ bp = indirdep->ir_savebp;
+ indirdep->ir_savebp = NULL;
+ free_indirdep(indirdep);
+ FREE_LOCK(ump);
+ brelse(bp);
+ ACQUIRE_LOCK(ump);
+ }
} else {
/* Complete when the real copy is written. */
WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
@@ -6589,6 +6600,7 @@ softdep_journal_freeblocks(ip, cred, length, flags)
struct buf *bp;
struct vnode *vp;
struct mount *mp;
+ daddr_t dbn;
ufs2_daddr_t extblocks, datablocks;
ufs_lbn_t tmpval, lbn, lastlbn;
int frags, lastoff, iboff, allocblock, needj, error, i;
@@ -6726,8 +6738,9 @@ softdep_journal_freeblocks(ip, cred, length, flags)
*/
ufs_itimes(vp);
ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
- error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
- (int)fs->fs_bsize, cred, &bp);
+ dbn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
+ error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize,
+ NULL, NULL, 0, cred, 0, NULL, &bp);
if (error) {
softdep_error("softdep_journal_freeblocks", error);
return;
@@ -6828,13 +6841,13 @@ softdep_journal_freeblocks(ip, cred, length, flags)
*/
size = sblksize(fs, length, lastlbn);
error = bread(vp, lastlbn, size, cred, &bp);
- if (error) {
+ if (error == 0) {
+ bzero((char *)bp->b_data + lastoff, size - lastoff);
+ bawrite(bp);
+ } else if (!ffs_fsfail_cleanup(ump, error)) {
softdep_error("softdep_journal_freeblks", error);
return;
}
- bzero((char *)bp->b_data + lastoff, size - lastoff);
- bawrite(bp);
-
}
ACQUIRE_LOCK(ump);
inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
@@ -6945,8 +6958,8 @@ softdep_setup_freeblocks(ip, length, flags)
if ((error = bread(ump->um_devvp,
fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
(int)fs->fs_bsize, NOCRED, &bp)) != 0) {
- brelse(bp);
- softdep_error("softdep_setup_freeblocks", error);
+ if (!ffs_fsfail_cleanup(ump, error))
+ softdep_error("softdep_setup_freeblocks", error);
return;
}
freeblks = newfreeblks(mp, ip);
@@ -8161,7 +8174,7 @@ indir_trunc(freework, dbn, lbn)
ufs_lbn_t lbnadd, nlbn;
u_long key;
int nblocks, ufs1fmt, freedblocks;
- int goingaway, freedeps, needj, level, cnt, i;
+ int goingaway, freedeps, needj, level, cnt, i, error;
freeblks = freework->fw_freeblks;
mp = freeblks->fb_list.wk_mp;
@@ -8199,10 +8212,11 @@ indir_trunc(freework, dbn, lbn)
if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
panic("indir_trunc: Bad indirdep %p from buf %p",
indirdep, bp);
- } else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
- NOCRED, &bp) != 0) {
- brelse(bp);
- return;
+ } else {
+ error = ffs_breadz(ump, freeblks->fb_devvp, dbn, dbn,
+ (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp);
+ if (error)
+ return;
}
ACQUIRE_LOCK(ump);
/* Protects against a race with complete_trunc_indir(). */
@@ -9700,6 +9714,7 @@ clear_unlinked_inodedep(inodedep)
struct inodedep *idn;
struct fs *fs, *bpfs;
struct buf *bp;
+ daddr_t dbn;
ino_t ino;
ino_t nino;
ino_t pino;
@@ -9753,11 +9768,10 @@ clear_unlinked_inodedep(inodedep)
bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
(int)fs->fs_sbsize, 0, 0, 0);
} else {
- error = bread(ump->um_devvp,
- fsbtodb(fs, ino_to_fsba(fs, pino)),
- (int)fs->fs_bsize, NOCRED, &bp);
- if (error)
- brelse(bp);
+ dbn = fsbtodb(fs, ino_to_fsba(fs, pino));
+ error = ffs_breadz(ump, ump->um_devvp, dbn, dbn,
+ (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL,
+ &bp);
}
ACQUIRE_LOCK(ump);
if (error)
@@ -10578,14 +10592,16 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
if ((adp->ad_state & ATTACHED) == 0)
panic("inodedep %p and adp %p not attached", inodedep, adp);
prevlbn = adp->ad_offset;
- if (adp->ad_offset < UFS_NDADDR &&
+ if (!ffs_fsfail_cleanup(ump, 0) &&
+ adp->ad_offset < UFS_NDADDR &&
dp->di_db[adp->ad_offset] != adp->ad_newblkno)
panic("initiate_write_inodeblock_ufs2: "
"direct pointer #%jd mismatch %jd != %jd",
(intmax_t)adp->ad_offset,
(intmax_t)dp->di_db[adp->ad_offset],
(intmax_t)adp->ad_newblkno);
- if (adp->ad_offset >= UFS_NDADDR &&
+ if (!ffs_fsfail_cleanup(ump, 0) &&
+ adp->ad_offset >= UFS_NDADDR &&
dp->di_ib[adp->ad_offset - UFS_NDADDR] != adp->ad_newblkno)
panic("initiate_write_inodeblock_ufs2: "
"indirect pointer #%jd mismatch %jd != %jd",
@@ -10817,12 +10833,14 @@ softdep_setup_inofree(mp, bp, ino, wkhd)
("softdep_setup_inofree called on non-softdep filesystem"));
ump = VFSTOUFS(mp);
ACQUIRE_LOCK(ump);
- fs = ump->um_fs;
- cgp = (struct cg *)bp->b_data;
- inosused = cg_inosused(cgp);
- if (isset(inosused, ino % fs->fs_ipg))
- panic("softdep_setup_inofree: inode %ju not freed.",
- (uintmax_t)ino);
+ if (!ffs_fsfail_cleanup(ump, 0)) {
+ fs = ump->um_fs;
+ cgp = (struct cg *)bp->b_data;
+ inosused = cg_inosused(cgp);
+ if (isset(inosused, ino % fs->fs_ipg))
+ panic("softdep_setup_inofree: inode %ju not freed.",
+ (uintmax_t)ino);
+ }
if (inodedep_lookup(mp, ino, 0, &inodedep))
panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
(uintmax_t)ino, inodedep);
@@ -11091,6 +11109,26 @@ initiate_write_bmsafemap(bmsafemap, bp)
wk_list);
}
+void
+softdep_handle_error(struct buf *bp)
+{
+ struct ufsmount *ump;
+
+ ump = softdep_bp_to_mp(bp);
+ if (ump == NULL)
+ return;
+
+ if (ffs_fsfail_cleanup(ump, bp->b_error)) {
+ /*
+ * No future writes will succeed, so the on-disk image is safe.
+ * Pretend that this write succeeded so that the softdep state
+ * will be cleaned up naturally.
+ */
+ bp->b_ioflags &= ~BIO_ERROR;
+ bp->b_error = 0;
+ }
+}
+
/*
* This routine is called during the completion interrupt
* service routine for a disk write (from the procedure called
@@ -11117,6 +11155,8 @@ softdep_disk_write_complete(bp)
"with outstanding dependencies for buffer %p", bp));
if (ump == NULL)
return;
+ if ((bp->b_ioflags & BIO_ERROR) != 0)
+ softdep_handle_error(bp);
/*
* If an error occurred while doing the write, then the data
* has not hit the disk and the dependencies cannot be processed.
@@ -12305,6 +12345,13 @@ softdep_load_inodeblock(ip)
FREE_LOCK(ump);
return;
}
+ if (ip->i_nlink != inodedep->id_nlinkwrote &&
+ inodedep->id_nlinkwrote != -1) {
+ KASSERT(ip->i_nlink == 0 &&
+ (ump->um_flags & UM_FSFAIL_CLEANUP) != 0,
+ ("read bad i_nlink value"));
+ ip->i_effnlink = ip->i_nlink = inodedep->id_nlinkwrote;
+ }
ip->i_effnlink -= inodedep->id_nlinkdelta;
KASSERT(ip->i_effnlink >= 0,
("softdep_load_inodeblock: negative i_effnlink"));
@@ -12367,6 +12414,11 @@ again:
panic("softdep_update_inodeblock: bad link count");
return;
}
+ KASSERT(ip->i_nlink >= inodedep->id_nlinkdelta,
+ ("softdep_update_inodeblock inconsistent ip %p i_nlink %d "
+ "inodedep %p id_nlinkdelta %jd",
+ ip, ip->i_nlink, inodedep, (intmax_t)inodedep->id_nlinkdelta));
+ inodedep->id_nlinkwrote = ip->i_nlink;
if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
/*
@@ -12642,7 +12694,7 @@ restart:
else
brelse(bp);
vput(pvp);
- if (error != 0)
+ if (!ffs_fsfail_cleanup(ump, error))
return (error);
ACQUIRE_LOCK(ump);
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
index 8a3a0f57f117..158c09b096d0 100644
--- a/sys/ufs/ffs/ffs_subr.c
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -67,6 +67,7 @@ struct malloc_type;
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/ucred.h>
+#include <sys/taskqueue.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@@ -136,7 +137,8 @@ ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
return (0);
}
dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
- if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0) {
+ if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 &&
+ !ffs_fsfail_cleanup(ITOUMP(ip), error)) {
printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt,
(intmax_t)ino);
return (error);
@@ -202,6 +204,93 @@ ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize)
UFS_UNLOCK(ump);
return (EINTEGRITY);
}
+
+/*
+ * Initiate a forcible unmount.
+ * Used to unmount filesystems whose underlying media has gone away.
+ */
+static void
+ffs_fsfail_unmount(void *v, int pending)
+{
+ struct fsfail_task *etp;
+ struct mount *mp;
+
+ etp = v;
+
+ /*
+ * Find our mount and get a ref on it, then try to unmount.
+ */
+ mp = vfs_getvfs(&etp->fsid);
+ if (mp != NULL)
+ dounmount(mp, MNT_FORCE, curthread);
+ free(etp, M_UFSMNT);
+}
+
+/*
+ * On first ENXIO error, start a task that forcibly unmounts the filesystem.
+ *
+ * Return true if a cleanup is in progress.
+ */
+int
+ffs_fsfail_cleanup(struct ufsmount *ump, int error)
+{
+ int retval;
+
+ UFS_LOCK(ump);
+ retval = ffs_fsfail_cleanup_locked(ump, error);
+ UFS_UNLOCK(ump);
+ return (retval);
+}
+
+int
+ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
+{
+ struct fsfail_task *etp;
+ struct task *tp;
+
+ mtx_assert(UFS_MTX(ump), MA_OWNED);
+ if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
+ ump->um_flags |= UM_FSFAIL_CLEANUP;
+ /*
+ * Queue an async forced unmount.
+ */
+ etp = ump->um_fsfail_task;
+ ump->um_fsfail_task = NULL;
+ if (etp != NULL) {
+ tp = &etp->task;
+ TASK_INIT(tp, 0, ffs_fsfail_unmount, etp);
+ taskqueue_enqueue(taskqueue_thread, tp);
+ printf("UFS: forcibly unmounting %s from %s\n",
+ ump->um_mountp->mnt_stat.f_mntfromname,
+ ump->um_mountp->mnt_stat.f_mntonname);
+ }
+ }
+ return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
+}
+
+/*
+ * Wrapper used during ENXIO cleanup to allocate empty buffers when
+ * the kernel is unable to read the real one. They are needed so that
+ * the soft updates code can use them to unwind its dependencies.
+ */
+int
+ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno,
+ daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt,
+ struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *),
+ struct buf **bpp)
+{
+ int error;
+
+ flags |= GB_CVTENXIO;
+ error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt,
+ cred, flags, ckhashfunc, bpp);
+ if (error != 0 && ffs_fsfail_cleanup(ump, error)) {
+ error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp);
+ KASSERT(error == 0, ("getblkx failed"));
+ vfs_bio_bzero_buf(*bpp, 0, size);
+ }
+ return (error);
+}
#endif /* _KERNEL */
/*
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index ff8920b2690c..8607ecd6c2d6 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
+#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <security/mac/mac_framework.h>
@@ -148,6 +149,12 @@ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
"multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
"nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
+static int ffs_enxio_enable = 1;
+SYSCTL_DECL(_vfs_ffs);
+SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN,
+ &ffs_enxio_enable, 0,
+ "enable mapping of other disk I/O errors to ENXIO");
+
static int
ffs_mount(struct mount *mp)
{
@@ -795,6 +802,7 @@ ffs_mountfs(odevvp, mp, td)
struct g_consumer *cp;
struct mount *nmp;
struct vnode *devvp;
+ struct fsfail_task *etp;
int candelete, canspeedup;
off_t loc;
@@ -1085,6 +1093,9 @@ ffs_mountfs(odevvp, mp, td)
(void) ufs_extattr_autostart(mp, td);
#endif /* !UFS_EXTATTR_AUTOSTART */
#endif /* !UFS_EXTATTR */
+ etp = malloc(sizeof *ump->um_fsfail_task, M_UFSMNT, M_WAITOK | M_ZERO);
+ etp->fsid = mp->mnt_stat.f_fsid;
+ ump->um_fsfail_task = etp;
return (0);
out:
if (fs != NULL) {
@@ -1134,7 +1145,6 @@ ffs_use_bread(void *devfd, off_t loc, void **bufp, int size)
return (0);
}
-#include <sys/sysctl.h>
static int bigcgs = 0;
SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
@@ -1271,7 +1281,7 @@ ffs_unmount(mp, mntflags)
error = softdep_flushfiles(mp, flags, td);
else
error = ffs_flushfiles(mp, flags, td);
- if (error != 0 && error != ENXIO)
+ if (error != 0 && !ffs_fsfail_cleanup(ump, error))
goto fail;
UFS_LOCK(ump);
@@ -1288,7 +1298,9 @@ ffs_unmount(mp, mntflags)
if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
error = ffs_sbupdate(ump, MNT_WAIT, 0);
- if (error && error != ENXIO) {
+ if (ffs_fsfail_cleanup(ump, error))
+ error = 0;
+ if (error != 0 && !ffs_fsfail_cleanup(ump, error)) {
fs->fs_clean = 0;
goto fail;
}
@@ -1326,6 +1338,8 @@ ffs_unmount(mp, mntflags)
}
free(fs->fs_csp, M_UFSMNT);
free(fs, M_UFSMNT);
+ if (ump->um_fsfail_task != NULL)
+ free(ump->um_fsfail_task, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = NULL;
MNT_ILOCK(mp);
@@ -1640,6 +1654,8 @@ loop:
if (waitfor == MNT_WAIT || rebooting) {
if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
allerror = error;
+ if (ffs_fsfail_cleanup(ump, allerror))
+ allerror = 0;
/* Flushed work items may create new vnodes to clean */
if (allerror == 0 && count)
goto loop;
@@ -1657,6 +1673,8 @@ loop:
error = ffs_sbupdate(ump, waitfor, 0);
if (error != 0)
allerror = error;
+ if (ffs_fsfail_cleanup(ump, allerror))
+ allerror = 0;
if (allerror == 0 && waitfor == MNT_WAIT)
goto loop;
} else if (suspend != 0) {
@@ -1681,6 +1699,8 @@ loop:
if (fs->fs_fmod != 0 &&
(error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
allerror = error;
+ if (ffs_fsfail_cleanup(ump, allerror))
+ allerror = 0;
return (allerror);
}
@@ -1707,6 +1727,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
struct ufsmount *ump;
struct buf *bp;
struct vnode *vp;
+ daddr_t dbn;
int error;
MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0);
@@ -1796,9 +1817,10 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
}
/* Read in the disk contents for the inode, copy into the inode. */
- error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
- (int)fs->fs_bsize, NOCRED, &bp);
- if (error) {
+ dbn = fsbtodb(fs, ino_to_fsba(fs, ino));
+ error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize,
+ NULL, NULL, 0, NOCRED, 0, NULL, &bp);
+ if (error != 0) {
/*
* The inode does not contain anything useful, so it would
* be misleading to leave it on its hash chain. With mode
@@ -1957,6 +1979,7 @@ ffs_uninit(vfsp)
ret = ufs_uninit(vfsp);
softdep_uninitialize();
ffs_susp_uninitialize();
+ taskqueue_drain_all(taskqueue_thread);
return (ret);
}
@@ -2039,6 +2062,8 @@ ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size)
* Writing the superblock itself. We need to do special checks for it.
*/
bp = devfdp->sbbp;
+ if (ffs_fsfail_cleanup(ump, devfdp->error))
+ devfdp->error = 0;
if (devfdp->error != 0) {
brelse(bp);
return (devfdp->error);
@@ -2112,6 +2137,11 @@ ffs_backgroundwritedone(struct buf *bp)
struct bufobj *bufobj;
struct buf *origbp;
+#ifdef SOFTUPDATES
+ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0)
+ softdep_handle_error(bp);
+#endif
+
/*
* Find the original buffer that we are writing.
*/
@@ -2122,7 +2152,7 @@ ffs_backgroundwritedone(struct buf *bp)
/*
* We should mark the cylinder group buffer origbp as
- * dirty, to not loose the failed write.
+ * dirty, to not lose the failed write.
*/
if ((bp->b_ioflags & BIO_ERROR) != 0)
origbp->b_vflags |= BV_BKGRDERR;
@@ -2393,6 +2423,8 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
break;
}
}
+ if (bp->b_iocmd != BIO_READ && ffs_enxio_enable)
+ bp->b_xflags |= BX_CVTENXIO;
g_vfs_strategy(bo, bp);
}
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 8a1d13802e31..7c1df1f50e31 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -239,6 +239,8 @@ retry:
}
BO_UNLOCK(bo);
}
+ if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
+ return (ENXIO);
return (0);
}
@@ -247,6 +249,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
{
struct inode *ip;
struct bufobj *bo;
+ struct ufsmount *ump;
struct buf *bp, *nbp;
ufs_lbn_t lbn;
int error, passes;
@@ -255,14 +258,18 @@ ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
ip = VTOI(vp);
ip->i_flag &= ~IN_NEEDSYNC;
bo = &vp->v_bufobj;
+ ump = VFSTOUFS(vp->v_mount);
/*
* When doing MNT_WAIT we must first flush all dependencies
* on the inode.
*/
if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
- (error = softdep_sync_metadata(vp)) != 0)
+ (error = softdep_sync_metadata(vp)) != 0) {
+ if (ffs_fsfail_cleanup(ump, error))
+ error = 0;
return (error);
+ }
/*
* Flush all dirty buffers associated with a vnode.
@@ -332,7 +339,10 @@ loop:
}
if (wait) {
bremfree(bp);
- if ((error = bwrite(bp)) != 0)
+ error = bwrite(bp);
+ if (ffs_fsfail_cleanup(ump, error))
+ error = 0;
+ if (error != 0)
return (error);
} else if ((bp->b_flags & B_CLUSTEROK)) {
(void) vfs_bio_awrite(bp);
@@ -901,8 +911,11 @@ ffs_write(ap)
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
- } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
error = ffs_update(vp, 1);
+ if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
+ error = ENXIO;
+ }
return (error);
}
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 6fd248eb6387..868ada00f2dc 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -358,6 +358,7 @@ struct inodedep {
struct fs *id_fs; /* associated filesystem */
ino_t id_ino; /* dependent inode */
nlink_t id_nlinkdelta; /* saved effective link count */
+ nlink_t id_nlinkwrote; /* i_nlink that we wrote to disk */
nlink_t id_savednlink; /* Link saved during rollback */
LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */
struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 16f2ed3da611..8ef8c78d654f 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1426,6 +1426,7 @@ relock:
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(tip);
}
+ goto bad;
}
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
/*
@@ -1523,11 +1524,13 @@ unlockout:
if (error == 0 && endoff != 0) {
error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
(DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
- if (error != 0)
+ if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error))
vn_printf(tdvp,
"ufs_rename: failed to truncate, error %d\n",
error);
#ifdef UFS_DIRHASH
+ if (error != 0)
+ ufsdirhash_free(tdp);
else if (tdp->i_dirhash != NULL)
ufsdirhash_dirtrunc(tdp, endoff);
#endif
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index 2d669338f98c..57e163c11d77 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -45,6 +45,8 @@ struct ufs_args {
#ifdef _KERNEL
+#include <sys/_task.h>
+
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_UFSMNT);
MALLOC_DECLARE(M_TRIM);
@@ -65,6 +67,10 @@ struct inodedep;
TAILQ_HEAD(inodedeplst, inodedep);
LIST_HEAD(bmsafemaphd, bmsafemap);
LIST_HEAD(trimlist_hashhead, ffs_blkfree_trim_params);
+struct fsfail_task {
+ struct task task;
+ fsid_t fsid;
+};
/*
* This structure describes the UFS specific mount structure data.
@@ -112,6 +118,7 @@ struct ufsmount {
struct taskqueue *um_trim_tq; /* (c) trim request queue */
struct trimlist_hashhead *um_trimhash; /* (i) trimlist hash table */
u_long um_trimlisthashsize; /* (i) trim hash table size-1 */
+ struct fsfail_task *um_fsfail_task; /* (i) task for fsfail cleanup*/
/* (c) - below function ptrs */
int (*um_balloc)(struct vnode *, off_t, int, struct ucred *,
int, struct buf **);
@@ -133,7 +140,8 @@ struct ufsmount {
#define UM_CANDELETE 0x00000001 /* devvp supports TRIM */
#define UM_WRITESUSPENDED 0x00000002 /* suspension in progress */
#define UM_CANSPEEDUP 0x00000004 /* devvp supports SPEEDUP */
-
+#define UM_FSFAIL_CLEANUP 0x00000008 /* need cleanup after
+ unrecoverable error */
/*
* function prototypes
*/