diff options
author | Conrad Meyer <cem@FreeBSD.org> | 2019-09-11 21:24:14 +0000 |
---|---|---|
committer | Conrad Meyer <cem@FreeBSD.org> | 2019-09-11 21:24:14 +0000 |
commit | aaa3852435aee8c1b5abfe38ed7d43f9ba57e6dd (patch) | |
tree | c4af6be7d3abd633c1034d04254318177754b071 /sys/fs/msdosfs | |
parent | 8d910a42823bf5d57e859949f355568657cc86dd (diff) | |
download | src-aaa3852435aee8c1b5abfe38ed7d43f9ba57e6dd.tar.gz src-aaa3852435aee8c1b5abfe38ed7d43f9ba57e6dd.zip |
buf: Add B_INVALONERR flag to discard data
Setting the B_INVALONERR flag before a synchronous write causes the buf
cache to forcibly invalidate contents if the write fails (BIO_ERROR).
This is intended to be used to allow layers above the buffer cache to make
more informed decisions about when discarding dirty buffers without
successful write is acceptable.
As a proof of concept, use in msdosfs to handle failures to mark the on-disk
'dirty' bit during rw mount or ro->rw update.
Extending this to other filesystems is left as future work.
PR: 210316
Reviewed by: kib (with objections)
Sponsored by: Dell EMC Isilon
Differential Revision: https://reviews.freebsd.org/D21539
Notes
Notes:
svn path=/head/; revision=352233
Diffstat (limited to 'sys/fs/msdosfs')
-rw-r--r-- | sys/fs/msdosfs/fat.h | 8 | ||||
-rw-r--r-- | sys/fs/msdosfs/msdosfs_fat.c | 32 | ||||
-rw-r--r-- | sys/fs/msdosfs/msdosfs_vfsops.c | 23 |
3 files changed, 51 insertions, 12 deletions
diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h index 6b280868e5b9..bb0f128eaa87 100644 --- a/sys/fs/msdosfs/fat.h +++ b/sys/fs/msdosfs/fat.h @@ -103,7 +103,13 @@ int fatentry(int function, struct msdosfsmount *pmp, u_long cluster, u_long *old int freeclusterchain(struct msdosfsmount *pmp, u_long startchain); int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags); void fc_purge(struct denode *dep, u_int frcn); -int markvoldirty(struct msdosfsmount *pmp, int dirty); +int markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade); + +static inline int +markvoldirty(struct msdosfsmount *pmp, bool dirty) +{ + return (markvoldirty_upgrade(pmp, dirty, false)); +} #endif /* _KERNEL || MAKEFS */ #endif /* !_FS_MSDOSFS_FAT_H_ */ diff --git a/sys/fs/msdosfs/msdosfs_fat.c b/sys/fs/msdosfs/msdosfs_fat.c index 70970592b2b8..2fb9deec6f5c 100644 --- a/sys/fs/msdosfs/msdosfs_fat.c +++ b/sys/fs/msdosfs/msdosfs_fat.c @@ -1117,7 +1117,7 @@ extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, * ? (other errors from called routines) */ int -markvoldirty(struct msdosfsmount *pmp, int dirty) +markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade) { struct buf *bp; u_long bn, bo, bsize, byteoffset, fatval; @@ -1130,8 +1130,11 @@ markvoldirty(struct msdosfsmount *pmp, int dirty) if (FAT12(pmp)) return (0); - /* Can't change the bit on a read-only filesystem. */ - if (pmp->pm_flags & MSDOSFSMNT_RONLY) + /* + * Can't change the bit on a read-only filesystem, except as part of + * ro->rw upgrade. + */ + if ((pmp->pm_flags & MSDOSFSMNT_RONLY) != 0 && !rw_upgrade) return (EROFS); /* @@ -1167,6 +1170,29 @@ markvoldirty(struct msdosfsmount *pmp, int dirty) putushort(&bp->b_data[bo], fatval); } + /* + * The concern here is that a devvp may be readonly, without reporting + * itself as such through the usual channels. In that case, we'd like + * it if attempting to mount msdosfs rw didn't panic the system. + * + * markvoldirty is invoked as the first write on backing devvps when + * either msdosfs is mounted for the first time, or a ro mount is + * upgraded to rw. + * + * In either event, if a write error occurs dirtying the volume: + * - No user data has been permitted to be written to cache yet. + * - We can abort the high-level operation (mount, or ro->rw) safely. + * - We don't derive any benefit from leaving a zombie dirty buf in + * the cache that can not be cleaned or evicted. + * + * So, mark B_INVALONERR to have bwrite() -> brelse() detect that + * condition and force-invalidate our write to the block if it occurs. + * + * PR 210316 provides more context on the discovery and diagnosis of + * the problem, as well as earlier attempts to solve it. + */ + bp->b_flags |= B_INVALONERR; + /* Write out the modified FAT block synchronously. */ return (bwrite(bp)); } diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index b374522e4d31..e66fba9d5ee8 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -311,16 +311,25 @@ msdosfs_mount(struct mount *mp) if (error) return (error); + /* Now that the volume is modifiable, mark it dirty. */ + error = markvoldirty_upgrade(pmp, true, true); + if (error) { + /* + * If dirtying the superblock failed, drop GEOM + * 'w' refs (we're still RO). + */ + g_topology_lock(); + (void)g_access(pmp->pm_cp, 0, -1, 0); + g_topology_unlock(); + + return (error); + } + pmp->pm_fmod = 1; pmp->pm_flags &= ~MSDOSFSMNT_RONLY; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); - - /* Now that the volume is modifiable, mark it dirty. */ - error = markvoldirty(pmp, 1); - if (error) - return (error); } } /* @@ -701,10 +710,8 @@ mountmsdosfs(struct vnode *devvp, struct mount *mp) if (ronly) pmp->pm_flags |= MSDOSFSMNT_RONLY; else { - if ((error = markvoldirty(pmp, 1)) != 0) { - (void)markvoldirty(pmp, 0); + if ((error = markvoldirty(pmp, 1)) != 0) goto error_exit; - } pmp->pm_fmod = 1; } mp->mnt_data = pmp; |