aboutsummaryrefslogtreecommitdiff
path: root/sbin/fsck_ffs
diff options
context:
space:
mode:
authorKirk McKusick <mckusick@FreeBSD.org>2023-03-30 04:09:39 +0000
committerKirk McKusick <mckusick@FreeBSD.org>2023-03-30 04:13:27 +0000
commitfe5e6e2cc5d6f2e4121eccdb3a8ceba646aef2c9 (patch)
treef74a7144169b6ec5ee7758817e409d55296b031a /sbin/fsck_ffs
parent1fb7d2cf999e52e3682174d0c2f20cb3baf414f3 (diff)
downloadsrc-fe5e6e2cc5d6f2e4121eccdb3a8ceba646aef2c9.tar.gz
src-fe5e6e2cc5d6f2e4121eccdb3a8ceba646aef2c9.zip
Improvement in UFS/FFS directory placement when doing mkdir(2).
The algorithm for laying out new directories was devised in the 1980s and markedly improved the performance of the filesystem. In those days large disks had at most 100 cylinder groups and often as few as 10-20. Modern multi-terrabyte disks have thousands of cylinder groups. The original algorithm does not handle these large sizes well. This change attempts to expand the scope of the original algorithm to work well with these much larger disks while still retaining the properties of the original algorithm for small disks. The filesystem implementation is divided into policy routines and implementation routines. The policy routines can be changed in any way desired without risk of corrupting the filesystem. The policy requests are handled by the implementation layer. If the policy asks for an available resource, it is granted. But if it asks for an already in-use resource, then the implementation will provide an available one nearby the request. Thus it is impossible for a policy to double allocate. This change is limited to the policy implementation. This change updates the ffs_dirpref() routine which is responsible for selecting the cylinder group into which a new directory should be placed. If we are near the root of the filesystem we aim to spread them out as much as possible. As we descend deeper from the root we cluster them closer together around their parent as we expect them to be more closely interactive. Higher-level directories like usr/src/sys and usr/src/bin should be separated while the directories in these areas are more likely to be accessed together so should be closer. And directories within commands or kernel subsystems should be closer still. We pick a range of cylinder groups around the cylinder group of the directory in which we are being created. The size of the range for our search is based on our depth from the root of our filesystem. We then probe that range based on how many directories are already present. The first new directory is at 1/2 (middle) of the range; the second is in the first 1/4 of the range, then at 3/4, 1/8, 3/8, 5/8, 7/8, 1/16, 3/16, 5/16, etc. It is desirable to store the depth of a directory in its on-disk inode so that it is available when we need it. We add a new field di_dirdepth to track the depth of each directory. Because there are few spare fields left in the inode, we choose to share an existing field in the inode rather than having one of our own. Specifically we create a union with the di_freelink field. The di_freelink field is used to track inodes that have been unlinked but remain referenced. It is not needed until a rmdir(2) operation has been done on a directory. At that point, the directory has no contents and even if it is kept active as a current directory is no longer able to have any new directories or files created in it. Thus the use of di_dirdepth and di_freelink will never coincide. Reported by: Timo Voelker Reviewed by: kib Tested by: Peter Holm MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D39246
Diffstat (limited to 'sbin/fsck_ffs')
-rw-r--r--sbin/fsck_ffs/dir.c107
-rw-r--r--sbin/fsck_ffs/fsck.h4
-rw-r--r--sbin/fsck_ffs/inode.c1
-rw-r--r--sbin/fsck_ffs/pass1.c11
-rw-r--r--sbin/fsck_ffs/pass2.c13
-rw-r--r--sbin/fsck_ffs/pass3.c4
6 files changed, 123 insertions, 17 deletions
diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
index 64e477c66ed8..cc5305c390a4 100644
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -88,6 +88,7 @@ propagate(void)
if (inoinfo(inp->i_parent)->ino_state == DFOUND &&
INO_IS_DUNFOUND(inp->i_number)) {
inoinfo(inp->i_number)->ino_state = DFOUND;
+ check_dirdepth(inp);
change++;
}
}
@@ -95,6 +96,90 @@ propagate(void)
}
/*
+ * Check that the recorded depth of the directory is correct.
+ */
+void
+check_dirdepth(struct inoinfo *inp)
+{
+ struct inoinfo *parentinp;
+ struct inode ip;
+ union dinode *dp;
+ int saveresolved;
+ static int updateasked, dirdepthupdate;
+
+ if ((parentinp = getinoinfo(inp->i_parent)) == NULL) {
+ pfatal("check_dirdepth: UNKNOWN PARENT DIR");
+ return;
+ }
+ /*
+ * If depth is correct, nothing to do.
+ */
+ if (parentinp->i_depth + 1 == inp->i_depth)
+ return;
+ /*
+ * Only the root inode should have depth of 0, so if any other
+ * directory has a depth of 0 then this is an old filesystem
+ * that has not been tracking directory depth. Ask just once
+ * whether it should start tracking directory depth.
+ */
+ if (inp->i_depth == 0 && updateasked == 0) {
+ updateasked = 1;
+ if (preen) {
+ pwarn("UPDATING FILESYSTEM TO TRACK DIRECTORY DEPTH");
+ dirdepthupdate = 1;
+ } else {
+ /*
+ * The file system can be marked clean even if
+ * a directory does not have the right depth.
+ * Hence, resolved should not be cleared when
+ * the filesystem does not update directory depths.
+ */
+ saveresolved = resolved;
+ dirdepthupdate =
+ reply("UPDATE FILESYSTEM TO TRACK DIRECTORY DEPTH");
+ resolved = saveresolved;
+ }
+ }
+ /*
+ * If we are not converting, nothing more to do.
+ */
+ if (inp->i_depth == 0 && dirdepthupdate == 0)
+ return;
+ /*
+ * Individual directory at wrong depth. Report it and correct if
+ * in preen mode or ask if in interactive mode. Note that if a
+ * directory is renamed to a new location that is at a different
+ * level in the tree, its depth will be recalculated, but none of
+ * the directories that it contains will be updated. Thus it is
+ * not unexpected to find directories with incorrect depths. No
+ * operational harm will come from this though new directory
+ * placement in the subtree may not be as optimal until the depths
+ * of the affected directories are corrected.
+ *
+ * To avoid much spurious output on otherwise clean filesystems
+ * we only generate detailed output when the debug flag is given.
+ */
+ ginode(inp->i_number, &ip);
+ dp = ip.i_dp;
+ if (inp->i_depth != 0 && debug) {
+ pwarn("DIRECTORY");
+ prtinode(&ip);
+ printf(" DEPTH %d SHOULD BE %d", inp->i_depth,
+ parentinp->i_depth + 1);
+ if (preen == 0 && reply("ADJUST") == 0) {
+ irelse(&ip);
+ return;
+ }
+ if (preen)
+ printf(" (ADJUSTED)\n");
+ }
+ inp->i_depth = parentinp->i_depth + 1;
+ DIP_SET(dp, di_dirdepth, inp->i_depth);
+ inodirty(&ip);
+ irelse(&ip);
+}
+
+/*
* Scan each entry in a directory block.
*/
int
@@ -471,7 +556,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
{
struct inode ip;
union dinode *dp;
- int lostdir;
+ int lostdir, depth;
ino_t oldlfdir;
struct inoinfo *inp;
struct inodesc idesc;
@@ -546,7 +631,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
irelse(&ip);
return (0);
}
- if ((changeino(UFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) {
+ if ((changeino(UFS_ROOTINO, lfname, lfdir, 1) & ALTERED) == 0) {
pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
irelse(&ip);
return (0);
@@ -575,7 +660,8 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
}
inoinfo(orphan)->ino_linkcnt--;
if (lostdir) {
- if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
+ depth = DIP(dp, di_dirdepth) + 1;
+ if ((changeino(orphan, "..", lfdir, depth) & ALTERED) == 0 &&
parentdir != (ino_t)-1)
(void)makeentry(orphan, lfdir, "..");
DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1);
@@ -607,7 +693,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
* fix an entry in a directory.
*/
int
-changeino(ino_t dir, const char *name, ino_t newnum)
+changeino(ino_t dir, const char *name, ino_t newnum, int depth)
{
struct inodesc idesc;
struct inode ip;
@@ -621,7 +707,10 @@ changeino(ino_t dir, const char *name, ino_t newnum)
idesc.id_name = strdup(name);
idesc.id_parent = newnum; /* new value for name */
ginode(dir, &ip);
- error = ckinode(ip.i_dp, &idesc);
+ if (((error = ckinode(ip.i_dp, &idesc)) & ALTERED) && newnum != 0) {
+ DIP_SET(ip.i_dp, di_dirdepth, depth);
+ getinoinfo(dir)->i_depth = depth;
+ }
free(idesc.id_name);
irelse(&ip);
return (error);
@@ -815,8 +904,8 @@ allocdir(ino_t parent, ino_t request, int mode)
struct inode ip;
union dinode *dp;
struct bufarea *bp;
- struct inoinfo *inp;
struct dirtemplate *dirp;
+ struct inoinfo *inp, *parentinp;
ino = allocino(request, IFDIR|mode);
if (ino == 0)
@@ -859,6 +948,12 @@ allocdir(ino_t parent, ino_t request, int mode)
inp->i_parent = parent;
inp->i_dotdot = parent;
inp->i_flags |= INFO_NEW;
+ if ((parentinp = getinoinfo(inp->i_parent)) == NULL) {
+ pfatal("allocdir: UNKNOWN PARENT DIR");
+ } else {
+ inp->i_depth = parentinp->i_depth + 1;
+ DIP_SET(dp, di_dirdepth, inp->i_depth);
+ }
inoinfo(ino)->ino_type = DT_DIR;
inoinfo(ino)->ino_state = inoinfo(parent)->ino_state;
if (inoinfo(ino)->ino_state == DSTATE) {
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
index 3c5cc957cd4e..43e826e13b77 100644
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -309,6 +309,7 @@ struct inoinfo {
ino_t i_parent; /* inode number of parent */
ino_t i_dotdot; /* inode number of `..' */
size_t i_isize; /* size of inode */
+ u_int i_depth; /* depth of directory from root */
u_int i_flags; /* flags, see below */
u_int i_numblks; /* size of block array in bytes */
ufs2_daddr_t i_blks[1]; /* actually longer */
@@ -462,9 +463,10 @@ void catch(int);
void catchquit(int);
void cgdirty(struct bufarea *);
struct bufarea *cglookup(int cg);
-int changeino(ino_t dir, const char *name, ino_t newnum);
+int changeino(ino_t dir, const char *name, ino_t newnum, int depth);
void check_blkcnt(struct inode *ip);
int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
+void check_dirdepth(struct inoinfo *inp);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
index 947e5e0cbc08..a46fea0607a0 100644
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -1135,6 +1135,7 @@ cacheino(union dinode *dp, ino_t inumber)
inp->i_dotdot = (ino_t)0;
inp->i_number = inumber;
inp->i_isize = DIP(dp, di_size);
+ inp->i_depth = DIP(dp, di_dirdepth);
inp->i_numblks = blks;
for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
inp->i_blks[i] = DIP(dp, di_db[i]);
diff --git a/sbin/fsck_ffs/pass1.c b/sbin/fsck_ffs/pass1.c
index 5f1ad8ecb686..afe9f7f2b789 100644
--- a/sbin/fsck_ffs/pass1.c
+++ b/sbin/fsck_ffs/pass1.c
@@ -388,14 +388,15 @@ checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
n_files++;
inoinfo(inumber)->ino_linkcnt = DIP(dp, di_nlink);
if (mode == IFDIR) {
- if (DIP(dp, di_size) == 0)
+ if (DIP(dp, di_size) == 0) {
inoinfo(inumber)->ino_state = DCLEAR;
- else if (DIP(dp, di_nlink) <= 0)
+ } else if (DIP(dp, di_nlink) <= 0) {
inoinfo(inumber)->ino_state = DZLINK;
- else
+ } else {
inoinfo(inumber)->ino_state = DSTATE;
- cacheino(dp, inumber);
- countdirs++;
+ cacheino(dp, inumber);
+ countdirs++;
+ }
} else if (DIP(dp, di_nlink) <= 0)
inoinfo(inumber)->ino_state = FZLINK;
else
diff --git a/sbin/fsck_ffs/pass2.c b/sbin/fsck_ffs/pass2.c
index abe14549e6f4..8200209cc03e 100644
--- a/sbin/fsck_ffs/pass2.c
+++ b/sbin/fsck_ffs/pass2.c
@@ -210,8 +210,10 @@ pass2(void)
if (inp->i_parent == 0 || inp->i_isize == 0)
continue;
if (inoinfo(inp->i_parent)->ino_state == DFOUND &&
- INO_IS_DUNFOUND(inp->i_number))
+ INO_IS_DUNFOUND(inp->i_number)) {
inoinfo(inp->i_number)->ino_state = DFOUND;
+ check_dirdepth(inp);
+ }
if (inp->i_dotdot == inp->i_parent ||
inp->i_dotdot == (ino_t)-1)
continue;
@@ -271,7 +273,8 @@ pass2(void)
inoinfo(inp->i_dotdot)->ino_linkcnt++;
inoinfo(inp->i_parent)->ino_linkcnt--;
inp->i_dotdot = inp->i_parent;
- (void)changeino(inp->i_number, "..", inp->i_parent);
+ (void)changeino(inp->i_number, "..", inp->i_parent,
+ getinoinfo(inp->i_parent)->i_depth + 1);
}
/*
* Mark all the directories that can be found from the root.
@@ -548,10 +551,12 @@ again:
case DFOUND:
inp = getinoinfo(dirp->d_ino);
if (idesc->id_entryno > 2) {
- if (inp->i_parent == 0)
+ if (inp->i_parent == 0) {
inp->i_parent = idesc->id_number;
- else if ((n = fix_extraneous(inp, idesc)) == 1)
+ check_dirdepth(inp);
+ } else if ((n = fix_extraneous(inp, idesc))) {
break;
+ }
}
/* FALLTHROUGH */
diff --git a/sbin/fsck_ffs/pass3.c b/sbin/fsck_ffs/pass3.c
index 22cb0393905b..b9d24f18371b 100644
--- a/sbin/fsck_ffs/pass3.c
+++ b/sbin/fsck_ffs/pass3.c
@@ -74,7 +74,7 @@ pass3(void)
if (inp->i_number == UFS_ROOTINO ||
(inp->i_parent != 0 && !S_IS_DUNFOUND(state)))
continue;
- if (state == DCLEAR)
+ if (state == DCLEAR || state == DZLINK)
continue;
/*
* If we are running with soft updates and we come
@@ -102,6 +102,7 @@ pass3(void)
inoinfo(lfdir)->ino_linkcnt--;
}
inoinfo(orphan)->ino_state = DFOUND;
+ check_dirdepth(inp);
propagate();
continue;
}
@@ -127,6 +128,7 @@ pass3(void)
}
irelse(&ip);
inoinfo(orphan)->ino_state = DFOUND;
+ check_dirdepth(inp);
propagate();
}
}