diff options
author | Kirk McKusick <mckusick@FreeBSD.org> | 2001-05-17 07:24:03 +0000 |
---|---|---|
committer | Kirk McKusick <mckusick@FreeBSD.org> | 2001-05-17 07:24:03 +0000 |
commit | 24a83a4b3f1bcc534cf935941b4d1811072c7167 (patch) | |
tree | d624096ee5804a79001f95795fc1206eba5f2b72 | |
parent | 74808b9ec2a94597f5218501ddabe302771ac991 (diff) | |
download | src-24a83a4b3f1bcc534cf935941b4d1811072c7167.tar.gz src-24a83a4b3f1bcc534cf935941b4d1811072c7167.zip |
When a new block is allocated to a directory, an fsync of a file
whose name is within that block must ensure not only that the block
containing the file name has been written, but also that the on-disk
directory inode references that block. When a new directory block
is created, we allocate a newdirblk structure which is linked to
the associated allocdirect (on its ad_newdirblk list). When the
allocdirect has been satisfied, the newdirblk structure is moved
to the inodedep id_bufwait list of its directory to await the inode
being written. When the inode is written, the directory entries
are fully committed and can be deleted from their pagedep->id_pendinghd
and inodedep->id_pendinghd lists.
Notes
Notes:
svn path=/head/; revision=76724
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 186 | ||||
-rw-r--r-- | sys/ufs/ffs/softdep.h | 59 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_extern.h | 4 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_lookup.c | 32 |
4 files changed, 242 insertions, 39 deletions
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index d9503010889d..044324ec69f1 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -91,6 +91,7 @@ static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated"); static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry"); static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory"); static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted"); +static MALLOC_DEFINE(M_NEWDIRBLK, "newdirblk","Unclaimed new directory block"); #define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE) @@ -107,7 +108,8 @@ static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted"); #define D_DIRADD 10 #define D_MKDIR 11 #define D_DIRREM 12 -#define D_LAST D_DIRREM +#define D_NEWDIRBLK 13 +#define D_LAST D_NEWDIRBLK /* * translate from workitem type to memory type @@ -126,7 +128,8 @@ static struct malloc_type *memtype[] = { M_FREEFILE, M_DIRADD, M_MKDIR, - M_DIRREM + M_DIRREM, + M_NEWDIRBLK }; #define DtoM(type) (memtype[type]) @@ -165,6 +168,7 @@ static struct dirrem *newdirrem __P((struct buf *, struct inode *, struct inode *, int, struct dirrem **)); static void free_diradd __P((struct diradd *)); static void free_allocindir __P((struct allocindir *, struct inodedep *)); +static void free_newdirblk __P((struct newdirblk *)); static int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t, long *)); static void deallocate_dependencies __P((struct buf *, struct inodedep *)); @@ -1298,6 +1302,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) adp->ad_newsize = newsize; adp->ad_oldsize = oldsize; adp->ad_state = ATTACHED; + LIST_INIT(&adp->ad_newdirblk); if (newblkno == oldblkno) adp->ad_freefrag = NULL; else @@ -1388,7 +1393,9 @@ allocdirect_merge(adphead, newadp, oldadp) struct allocdirect *newadp; /* allocdirect being added */ struct allocdirect *oldadp; /* existing allocdirect being checked */ { + struct worklist *wk; struct freefrag *freefrag; + struct newdirblk *newdirblk; #ifdef DEBUG if (lk.lkt_held == -1) @@ -1398,7 +1405,7 @@ allocdirect_merge(adphead, newadp, oldadp) newadp->ad_oldsize != oldadp->ad_newsize || newadp->ad_lbn >= NDADDR) { FREE_LOCK(&lk); - panic("allocdirect_check: old %d != new %d || lbn %ld >= %d", + panic("allocdirect_merge: old %d != new %d || lbn %ld >= %d", newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn, NDADDR); } @@ -1425,6 +1432,17 @@ allocdirect_merge(adphead, newadp, oldadp) newadp->ad_freefrag = oldadp->ad_freefrag; oldadp->ad_freefrag = freefrag; } + /* + * If we are tracking a new directory-block allocation, + * move it from the old allocdirect to the new allocdirect. + */ + if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) { + newdirblk = WK_NEWDIRBLK(wk); + WORKLIST_REMOVE(&newdirblk->db_list); + if (LIST_FIRST(&oldadp->ad_newdirblk) != NULL) + panic("allocdirect_merge: extra newdirblk"); + WORKLIST_INSERT(&newadp->ad_newdirblk, &newdirblk->db_list); + } free_allocdirect(adphead, oldadp, 0); } @@ -1949,6 +1967,8 @@ free_allocdirect(adphead, adp, delay) struct allocdirect *adp; int delay; { + struct newdirblk *newdirblk; + struct worklist *wk; #ifdef DEBUG if (lk.lkt_held == -1) @@ -1966,10 +1986,57 @@ free_allocdirect(adphead, adp, delay) else add_to_worklist(&adp->ad_freefrag->ff_list); } + if ((wk = LIST_FIRST(&adp->ad_newdirblk)) != NULL) { + newdirblk = WK_NEWDIRBLK(wk); + WORKLIST_REMOVE(&newdirblk->db_list); + if (LIST_FIRST(&adp->ad_newdirblk) != NULL) + panic("free_allocdirect: extra newdirblk"); + if (delay) + WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait, + &newdirblk->db_list); + else + free_newdirblk(newdirblk); + } WORKITEM_FREE(adp, D_ALLOCDIRECT); } /* + * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep. + * This routine must be called with splbio interrupts blocked. + */ +static void +free_newdirblk(newdirblk) + struct newdirblk *newdirblk; +{ + struct pagedep *pagedep; + struct diradd *dap; + int i; + +#ifdef DEBUG + if (lk.lkt_held == -1) + panic("free_newdirblk: lock not held"); +#endif + /* + * Free any directory additions that have been committed. + */ + pagedep = newdirblk->db_pagedep; + pagedep->pd_state &= ~NEWBLOCK; + while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) + free_diradd(dap); + /* + * If no dependencies remain, the pagedep will be freed. + */ + for (i = 0; i < DAHASHSZ; i++) + if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL) + break; + if (i == DAHASHSZ) { + LIST_REMOVE(pagedep, pd_hash); + WORKITEM_FREE(pagedep, D_PAGEDEP); + } + WORKITEM_FREE(newdirblk, D_NEWDIRBLK); +} + +/* * Prepare an inode to be freed. The actual free operation is not * done until the zero'ed inode has been written to disk. */ @@ -2302,20 +2369,23 @@ free_allocindir(aip, inodedep) * count has been incremented, but before the directory entry's * pointer to the inode has been set. */ -void -softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) +int +softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk) struct buf *bp; /* buffer containing directory block */ struct inode *dp; /* inode for directory */ off_t diroffset; /* offset of new entry in directory */ long newinum; /* inode referenced by new directory entry */ struct buf *newdirbp; /* non-NULL => contents of new mkdir */ + int isnewblk; /* entry is in a newly allocated block */ { int offset; /* offset of new entry within directory block */ ufs_lbn_t lbn; /* block in directory containing new entry */ struct fs *fs; struct diradd *dap; + struct allocdirect *adp; struct pagedep *pagedep; struct inodedep *inodedep; + struct newdirblk *newdirblk = 0; struct mkdir *mkdir1, *mkdir2; /* @@ -2324,7 +2394,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) if (newinum == WINO) { if (newdirbp != NULL) bdwrite(newdirbp); - return; + return (0); } fs = dp->i_fs; @@ -2336,6 +2406,12 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) dap->da_offset = offset; dap->da_newinum = newinum; dap->da_state = ATTACHED; + if (isnewblk && lbn < NDADDR && fragoff(fs, diroffset) == 0) { + MALLOC(newdirblk, struct newdirblk *, sizeof(struct newdirblk), + M_NEWDIRBLK, M_SOFTDEP_FLAGS); + newdirblk->db_list.wk_type = D_NEWDIRBLK; + newdirblk->db_state = 0; + } if (newdirbp == NULL) { dap->da_state |= DEPCOMPLETE; ACQUIRE_LOCK(&lk); @@ -2364,7 +2440,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) * Dependency on link count increase for parent directory */ ACQUIRE_LOCK(&lk); - if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0 + if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0 || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) { dap->da_state &= ~MKDIR_PARENT; WORKITEM_FREE(mkdir2, D_MKDIR); @@ -2391,7 +2467,56 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp) diradd_inode_written(dap, inodedep); else WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list); + if (isnewblk) { + /* + * Directories growing into indirect blocks are rare + * enough and the frequency of new block allocation + * in those cases even more rare, that we choose not + * to bother tracking them. Rather we simply force the + * new directory entry to disk. + */ + if (lbn >= NDADDR) { + FREE_LOCK(&lk); + /* + * We only have a new allocation when at the + * beginning of a new block, not when we are + * expanding into an existing block. + */ + if (blkoff(fs, diroffset) == 0) + return (1); + return (0); + } + /* + * We only have a new allocation when at the beginning + * of a new fragment, not when we are expanding into an + * existing fragment. Also, there is nothing to do if we + * are already tracking this block. + */ + if (fragoff(fs, diroffset) != 0) { + FREE_LOCK(&lk); + return (0); + } + if ((pagedep->pd_state & NEWBLOCK) != 0) { + WORKITEM_FREE(newdirblk, D_NEWDIRBLK); + FREE_LOCK(&lk); + return (0); + } + /* + * Find our associated allocdirect and have it track us. + */ + if (inodedep_lookup(fs, dp->i_number, 0, &inodedep) == 0) + panic("softdep_setup_directory_add: lost inodedep"); + adp = TAILQ_LAST(&inodedep->id_newinoupdt, allocdirectlst); + if (adp == NULL || adp->ad_lbn != lbn) { + FREE_LOCK(&lk); + panic("softdep_setup_directory_add: lost entry"); + } + pagedep->pd_state |= NEWBLOCK; + newdirblk->db_pagedep = pagedep; + WORKLIST_INSERT(&adp->ad_newdirblk, &newdirblk->db_list); + } FREE_LOCK(&lk); + return (0); } /* @@ -3631,6 +3756,10 @@ handle_written_inodeblock(inodedep, bp) add_to_worklist(wk); continue; + case D_NEWDIRBLK: + free_newdirblk(WK_NEWDIRBLK(wk)); + continue; + default: lk.lkt_held = -1; panic("handle_written_inodeblock: Unknown type %s", @@ -3741,9 +3870,12 @@ handle_written_filepage(pagedep, bp) } /* * Free any directory additions that have been committed. + * If it is a newly allocated block, we have to wait until + * the on-disk directory inode claims the new block. */ - while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) - free_diradd(dap); + if ((pagedep->pd_state & NEWBLOCK) == 0) + while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL) + free_diradd(dap); /* * Uncommitted directory entries must be restored. */ @@ -3782,6 +3914,7 @@ handle_written_filepage(pagedep, bp) if ((bp->b_flags & B_DELWRI) == 0) stat_dir_entry++; bdirty(bp); + return (1); } /* * If no dependencies remain, the pagedep will be freed. @@ -3789,16 +3922,10 @@ handle_written_filepage(pagedep, bp) * is written back to disk. */ if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) { - for (i = 0; i < DAHASHSZ; i++) - if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL) - break; - if (i == DAHASHSZ) { - LIST_REMOVE(pagedep, pd_hash); - WORKITEM_FREE(pagedep, D_PAGEDEP); - return (0); - } + LIST_REMOVE(pagedep, pd_hash); + WORKITEM_FREE(pagedep, D_PAGEDEP); } - return (1); + return (0); } /* @@ -4001,8 +4128,8 @@ softdep_fsync(vp) } dap = WK_DIRADD(wk); /* - * Flush our parent if this directory entry - * has a MKDIR_PARENT dependency. + * Flush our parent if this directory entry has a MKDIR_PARENT + * dependency or is contained in a newly allocated block. */ if (dap->da_state & DIRCHG) pagedep = dap->da_previous->dm_pagedep; @@ -4015,7 +4142,11 @@ softdep_fsync(vp) FREE_LOCK(&lk); panic("softdep_fsync: dirty"); } - flushparent = dap->da_state & MKDIR_PARENT; + if ((dap->da_state & MKDIR_PARENT) || + (pagedep->pd_state & NEWBLOCK)) + flushparent = 1; + else + flushparent = 0; /* * If we are being fsync'ed as part of vgone'ing this vnode, * then we will not be able to release and recover the @@ -4039,11 +4170,24 @@ softdep_fsync(vp) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (error != 0) return (error); + /* + * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps + * that are contained in direct blocks will be resolved by + * doing a UFS_UPDATE. Pagedeps contained in indirect blocks + * may require a complete sync'ing of the directory. So, we + * try the cheap and fast UFS_UPDATE first, and if that fails, + * then we do the slower VOP_FSYNC of the directory. + */ if (flushparent) { if ((error = UFS_UPDATE(pvp, 1)) != 0) { vput(pvp); return (error); } + if ((pagedep->pd_state & NEWBLOCK) && + (error = VOP_FSYNC(pvp, p->p_ucred, MNT_WAIT, p))) { + vput(pvp); + return (error); + } } /* * Flush directory page containing the inode's name. diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index 9a158c5dab19..5610bdcc2fa4 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -84,20 +84,23 @@ * discarded. The IOSTARTED flag prevents multiple calls to the I/O * start routine from doing multiple rollbacks. The SPACECOUNTED flag * says that the files space has been accounted to the pending free - * space count. The ONWORKLIST flag shows whether the structure is - * currently linked onto a worklist. + * space count. The NEWBLOCK flag marks pagedep structures that have + * just been allocated, so must be claimed by the inode before all + * dependencies are complete. The ONWORKLIST flag shows whether the + * structure is currently linked onto a worklist. */ #define ATTACHED 0x0001 #define UNDONE 0x0002 #define COMPLETE 0x0004 #define DEPCOMPLETE 0x0008 -#define MKDIR_PARENT 0x0010 -#define MKDIR_BODY 0x0020 -#define RMDIR 0x0040 -#define DIRCHG 0x0080 -#define GOINGAWAY 0x0100 -#define IOSTARTED 0x0200 -#define SPACECOUNTED 0x0400 +#define MKDIR_PARENT 0x0010 /* diradd & mkdir only */ +#define MKDIR_BODY 0x0020 /* diradd & mkdir only */ +#define RMDIR 0x0040 /* dirrem only */ +#define DIRCHG 0x0080 /* diradd & dirrem only */ +#define GOINGAWAY 0x0100 /* indirdep only */ +#define IOSTARTED 0x0200 /* inodedep & pagedep only */ +#define SPACECOUNTED 0x0400 /* inodedep only */ +#define NEWBLOCK 0x0800 /* pagedep only */ #define ONWORKLIST 0x8000 #define ALLCOMPLETE (ATTACHED | COMPLETE | DEPCOMPLETE) @@ -142,6 +145,7 @@ struct worklist { #define WK_DIRADD(wk) ((struct diradd *)(wk)) #define WK_MKDIR(wk) ((struct mkdir *)(wk)) #define WK_DIRREM(wk) ((struct dirrem *)(wk)) +#define WK_NEWDIRBLK(wk) ((struct newdirblk *)(wk)) /* * Various types of lists @@ -302,7 +306,17 @@ struct bmsafemap { * be freed once the inode claiming the new block is written to disk. * This ad_fragfree request is attached to the id_inowait list of the * associated inodedep (pointed to by ad_inodedep) for processing after - * the inode is written. + * the inode is written. When a block is allocated to a directory, an + * fsync of a file whose name is within that block must ensure not only + * that the block containing the file name has been written, but also + * that the on-disk inode references that block. When a new directory + * block is created, we allocate a newdirblk structure which is linked + * to the associated allocdirect (on its ad_newdirblk list). When the + * allocdirect has been satisfied, the newdirblk structure is moved to + * the inodedep id_bufwait list of its directory to await the inode + * being written. When the inode is written, the directory entries are + * fully committed and can be deleted from their pagedep->id_pendinghd + * and inodedep->id_pendinghd lists. */ struct allocdirect { struct worklist ad_list; /* buffer holding block */ @@ -317,6 +331,7 @@ struct allocdirect { struct buf *ad_buf; /* cylgrp buffer (if pending) */ struct inodedep *ad_inodedep; /* associated inodedep */ struct freefrag *ad_freefrag; /* fragment to be freed (if any) */ + struct workhead ad_newdirblk; /* dir block to notify when written */ }; /* @@ -532,3 +547,27 @@ struct dirrem { }; #define dm_pagedep dm_un.dmu_pagedep #define dm_dirinum dm_un.dmu_dirinum + +/* + * A "newdirblk" structure tracks the progress of a newly allocated + * directory block from its creation until it is claimed by its on-disk + * inode. When a block is allocated to a directory, an fsync of a file + * whose name is within that block must ensure not only that the block + * containing the file name has been written, but also that the on-disk + * inode references that block. When a new directory block is created, + * we allocate a newdirblk structure which is linked to the associated + * allocdirect (on its ad_newdirblk list). When the allocdirect has been + * satisfied, the newdirblk structure is moved to the inodedep id_bufwait + * list of its directory to await the inode being written. When the inode + * is written, the directory entries are fully committed and can be + * deleted from their pagedep->id_pendinghd and inodedep->id_pendinghd + * lists. Note that we could track directory blocks allocated to indirect + * blocks using a similar scheme with the allocindir structures. Rather + * than adding this level of complexity, we simply write those newly + * allocated indirect blocks synchronously as such allocations are rare. + */ +struct newdirblk { + struct worklist db_list; /* id_inowait or pg_newdirblk */ +# define db_state db_list.wk_state /* unused */ + struct pagedep *db_pagedep; /* associated pagedep */ +}; diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index e08ee8d70c71..d95013bc4498 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -95,8 +95,8 @@ int ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **)); /* * Soft update function prototypes. */ -void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, - long, struct buf *)); +int softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, + long, struct buf *, int)); void softdep_change_directoryentry_offset __P((struct inode *, caddr_t, caddr_t, caddr_t, int)); void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 5acc85db287e..61a620deaaf3 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -745,10 +745,29 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } - softdep_setup_directory_add(bp, dp, dp->i_offset, - dirp->d_ino, newdirbp); - bdwrite(bp); - return (UFS_UPDATE(dvp, 0)); + if (softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp, 1) == 0) { + bdwrite(bp); + return (UFS_UPDATE(dvp, 0)); + } + /* We have just allocated a directory block in an + * indirect block. Rather than tracking when it gets + * claimed by the inode, we simply do a VOP_FSYNC + * now to ensure that it is there (in case the user + * does a future fsync). Note that we have to unlock + * the inode for the entry that we just entered, as + * the VOP_FSYNC may need to lock other inodes which + * can lead to deadlock if we also hold a lock on + * the newly entered node. + */ + if ((error = BUF_WRITE(bp))) + return (error); + if (tvp != NULL) + VOP_UNLOCK(tvp, 0, p); + error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT, p); + if (tvp != NULL) + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); } if (DOINGASYNC(dvp)) { bdwrite(bp); @@ -836,8 +855,9 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); if (DOINGSOFTDEP(dvp)) { - softdep_setup_directory_add(bp, dp, - dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); + (void) softdep_setup_directory_add(bp, dp, + dp->i_offset + (caddr_t)ep - dirbuf, + dirp->d_ino, newdirbp, 0); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { |