aboutsummaryrefslogtreecommitdiff
path: root/module/zfs/vdev.c
diff options
context:
space:
mode:
authorJohn Poduska <jpoduska@datto.com>2020-05-13 17:54:27 +0000
committerGitHub <noreply@github.com>2020-05-13 17:54:27 +0000
commit41035a049643ff7083a6cb6cd43b8eb70a7d18a1 (patch)
tree121cec91a2b10ca6e9fb7d376a7599d47f4aee8c /module/zfs/vdev.c
parentb29e31d80d6cb78dbd889e9b529333944b4c3ba1 (diff)
downloadsrc-41035a049643ff7083a6cb6cd43b8eb70a7d18a1.tar.gz
src-41035a049643ff7083a6cb6cd43b8eb70a7d18a1.zip
Resilver restarts unnecessarily when it encounters errors
When a resilver finishes, vdev_dtl_reassess is called to hopefully excise DTL_MISSING (amongst other things). If there are errors during the resilver, they are tracked in DTL_SCRUB, as spelled out in the block comment in vdev.c. DTL_SCRUB is in-core only, so it can only be used if the pool was online for the whole resilver. This state is tracked with the spa_scrub_started flag, which only gets set when the scan is initialized. Unfortunately, this flag gets cleared right before vdev_dtl_reassess gets called, so if there are any errors during the scan, DTL_MISSING will never get excised and the resilver will just continually restart. This fix simply moves clearing that flag until after the call to vdev_dtl_reasses. In addition, if a pool is imported and already has scn_errors > 0, this change will restart the resilver immediately instead of doing the rest of the scan and then restarting it from the beginning. On the other hand, if scn_errors == 0 at import, then no errors have been encountered so far, so the spa_scrub_started flag can be safely set. A test has been added to verify that resilver does not restart when relevant DTL's are available. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Paul Zuchowski <pzuchowski@datto.com> Signed-off-by: John Poduska <jpoduska@datto.com> Closes #10291
Diffstat (limited to 'module/zfs/vdev.c')
-rw-r--r--module/zfs/vdev.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 3c2135029bd0..923bf2e336a6 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -2583,7 +2583,6 @@ vdev_dtl_should_excise(vdev_t *vd)
spa_t *spa = vd->vdev_spa;
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
- ASSERT0(scn->scn_phys.scn_errors);
ASSERT0(vd->vdev_children);
if (vd->vdev_state < VDEV_STATE_DEGRADED)
@@ -2634,6 +2633,7 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (vd->vdev_ops->vdev_op_leaf) {
dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+ boolean_t wasempty = B_TRUE;
mutex_enter(&vd->vdev_dtl_lock);
@@ -2643,6 +2643,18 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
if (zfs_scan_ignore_errors && scn)
scn->scn_phys.scn_errors = 0;
+ if (scrub_txg != 0 &&
+ !range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ wasempty = B_FALSE;
+ zfs_dbgmsg("guid:%llu txg:%llu scrub:%llu started:%d "
+ "dtl:%llu/%llu errors:%llu",
+ (u_longlong_t)vd->vdev_guid, (u_longlong_t)txg,
+ (u_longlong_t)scrub_txg, spa->spa_scrub_started,
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd),
+ (u_longlong_t)(scn ? scn->scn_phys.scn_errors : 0));
+ }
+
/*
* If we've completed a scan cleanly then determine
* if this vdev should remove any DTLs. We only want to
@@ -2679,6 +2691,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
space_reftree_generate_map(&reftree,
vd->vdev_dtl[DTL_MISSING], 1);
space_reftree_destroy(&reftree);
+
+ if (!range_tree_is_empty(vd->vdev_dtl[DTL_MISSING])) {
+ zfs_dbgmsg("update DTL_MISSING:%llu/%llu",
+ (u_longlong_t)vdev_dtl_min(vd),
+ (u_longlong_t)vdev_dtl_max(vd));
+ } else if (!wasempty) {
+ zfs_dbgmsg("DTL_MISSING is now empty");
+ }
}
range_tree_vacate(vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
range_tree_walk(vd->vdev_dtl[DTL_MISSING],