aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c')
-rw-r--r--sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c179
1 files changed, 163 insertions, 16 deletions
diff --git a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
index c62a976c2e3f..59d8182c0b2e 100644
--- a/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
+++ b/sys/contrib/openzfs/cmd/zed/agents/zfs_mod.c
@@ -183,14 +183,14 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
nvlist_t *nvroot, *newvd;
pendingdev_t *device;
uint64_t wholedisk = 0ULL;
- uint64_t offline = 0ULL;
+ uint64_t offline = 0ULL, faulted = 0ULL;
uint64_t guid = 0ULL;
char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX];
int ret;
- boolean_t is_dm = B_FALSE;
boolean_t is_sd = B_FALSE;
+ boolean_t is_mpath_wholedisk = B_FALSE;
uint_t c;
vdev_stat_t *vs;
@@ -211,15 +211,73 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
&enc_sysfs_path);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
+ (void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_FAULTED, &faulted);
+
(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &guid);
- if (offline)
- return; /* don't intervene if it was taken offline */
+ /*
+ * Special case:
+ *
+ * We've seen times where a disk won't have a ZPOOL_CONFIG_PHYS_PATH
+ * entry in their config. For example, on this force-faulted disk:
+ *
+ * children[0]:
+ * type: 'disk'
+ * id: 0
+ * guid: 14309659774640089719
+ * path: '/dev/disk/by-vdev/L28'
+ * whole_disk: 0
+ * DTL: 654
+ * create_txg: 4
+ * com.delphix:vdev_zap_leaf: 1161
+ * faulted: 1
+ * aux_state: 'external'
+ * children[1]:
+ * type: 'disk'
+ * id: 1
+ * guid: 16002508084177980912
+ * path: '/dev/disk/by-vdev/L29'
+ * devid: 'dm-uuid-mpath-35000c500a61d68a3'
+ * phys_path: 'L29'
+ * vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+ * whole_disk: 0
+ * DTL: 1028
+ * create_txg: 4
+ * com.delphix:vdev_zap_leaf: 131
+ *
+ * If the disk's path is a /dev/disk/by-vdev/ path, then we can infer
+ * the ZPOOL_CONFIG_PHYS_PATH from the by-vdev disk name.
+ */
+ if (physpath == NULL && path != NULL) {
+ /* If path begins with "/dev/disk/by-vdev/" ... */
+ if (strncmp(path, DEV_BYVDEV_PATH,
+ strlen(DEV_BYVDEV_PATH)) == 0) {
+ /* Set physpath to the char after "/dev/disk/by-vdev" */
+ physpath = &path[strlen(DEV_BYVDEV_PATH)];
+ }
+ }
+
+ /*
+ * We don't want to autoreplace offlined disks. However, we do want to
+ * replace force-faulted disks (`zpool offline -f`). Force-faulted
+ * disks have both offline=1 and faulted=1 in the nvlist.
+ */
+ if (offline && !faulted) {
+ zed_log_msg(LOG_INFO, "%s: %s is offline, skip autoreplace",
+ __func__, path);
+ return;
+ }
- is_dm = zfs_dev_is_dm(path);
+ is_mpath_wholedisk = is_mpath_whole_disk(path);
zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
- " wholedisk %d, %s dm (guid %llu)", zpool_get_name(zhp), path,
- physpath ? physpath : "NULL", wholedisk, is_dm ? "is" : "not",
+ " %s blank disk, %s mpath blank disk, %s labeled, enc sysfs '%s', "
+ "(guid %llu)",
+ zpool_get_name(zhp), path,
+ physpath ? physpath : "NULL",
+ wholedisk ? "is" : "not",
+ is_mpath_wholedisk? "is" : "not",
+ labeled ? "is" : "not",
+ enc_sysfs_path,
(long long unsigned int)guid);
/*
@@ -253,8 +311,9 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
(newstate == VDEV_STATE_HEALTHY ||
newstate == VDEV_STATE_DEGRADED)) {
- zed_log_msg(LOG_INFO, " zpool_vdev_online: vdev %s is %s",
- fullpath, (newstate == VDEV_STATE_HEALTHY) ?
+ zed_log_msg(LOG_INFO,
+ " zpool_vdev_online: vdev '%s' ('%s') is "
+ "%s", fullpath, physpath, (newstate == VDEV_STATE_HEALTHY) ?
"HEALTHY" : "DEGRADED");
return;
}
@@ -271,11 +330,12 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* vdev online to trigger a FMA fault by posting an ereport.
*/
if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
- !(wholedisk || is_dm) || (physpath == NULL)) {
+ !(wholedisk || is_mpath_wholedisk) || (physpath == NULL)) {
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
&newstate);
zed_log_msg(LOG_INFO, "Pool's autoreplace is not enabled or "
- "not a whole disk for '%s'", fullpath);
+ "not a blank disk for '%s' ('%s')", fullpath,
+ physpath);
return;
}
@@ -287,7 +347,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
- if (realpath(rawpath, devpath) == NULL && !is_dm) {
+ if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
rawpath, strerror(errno));
@@ -303,12 +363,14 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
if ((vs->vs_state != VDEV_STATE_DEGRADED) &&
(vs->vs_state != VDEV_STATE_FAULTED) &&
(vs->vs_state != VDEV_STATE_CANT_OPEN)) {
+ zed_log_msg(LOG_INFO, " not autoreplacing since disk isn't in "
+ "a bad state (currently %d)", vs->vs_state);
return;
}
nvlist_lookup_string(vdev, "new_devid", &new_devid);
- if (is_dm) {
+ if (is_mpath_wholedisk) {
/* Don't label device mapper or multipath disks. */
} else if (!labeled) {
/*
@@ -522,8 +584,11 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
* the dp->dd_compare value.
*/
if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
- strcmp(dp->dd_compare, path) != 0)
+ strcmp(dp->dd_compare, path) != 0) {
+ zed_log_msg(LOG_INFO, " %s: no match (%s != vdev %s)",
+ __func__, dp->dd_compare, path);
return;
+ }
zed_log_msg(LOG_INFO, " zfs_iter_vdev: matched %s on %s",
dp->dd_prop, path);
@@ -571,6 +636,8 @@ zfs_iter_pool(zpool_handle_t *zhp, void *data)
ZPOOL_CONFIG_VDEV_TREE, &nvl);
zfs_iter_vdev(zhp, nvl, data);
}
+ } else {
+ zed_log_msg(LOG_INFO, "%s: no config\n", __func__);
}
/*
@@ -620,6 +687,72 @@ devphys_iter(const char *physical, const char *devid, zfs_process_func_t func,
}
/*
+ * Given a device identifier, find any vdevs with a matching by-vdev
+ * path. Normally we shouldn't need this as the comparison would be
+ * made earlier in the devphys_iter(). For example, if we were replacing
+ * /dev/disk/by-vdev/L28, normally devphys_iter() would match the
+ * ZPOOL_CONFIG_PHYS_PATH of "L28" from the old disk config to "L28"
+ * of the new disk config. However, we've seen cases where
+ * ZPOOL_CONFIG_PHYS_PATH was not in the config for the old disk. Here's
+ * an example of a real 2-disk mirror pool where one disk was force
+ * faulted:
+ *
+ * com.delphix:vdev_zap_top: 129
+ * children[0]:
+ * type: 'disk'
+ * id: 0
+ * guid: 14309659774640089719
+ * path: '/dev/disk/by-vdev/L28'
+ * whole_disk: 0
+ * DTL: 654
+ * create_txg: 4
+ * com.delphix:vdev_zap_leaf: 1161
+ * faulted: 1
+ * aux_state: 'external'
+ * children[1]:
+ * type: 'disk'
+ * id: 1
+ * guid: 16002508084177980912
+ * path: '/dev/disk/by-vdev/L29'
+ * devid: 'dm-uuid-mpath-35000c500a61d68a3'
+ * phys_path: 'L29'
+ * vdev_enc_sysfs_path: '/sys/class/enclosure/0:0:1:0/SLOT 30 32'
+ * whole_disk: 0
+ * DTL: 1028
+ * create_txg: 4
+ * com.delphix:vdev_zap_leaf: 131
+ *
+ * So in the case above, the only thing we could compare is the path.
+ *
+ * We can do this because we assume by-vdev paths are authoritative as physical
+ * paths. We could not assume this for normal paths like /dev/sda since the
+ * physical location /dev/sda points to could change over time.
+ */
+static boolean_t
+by_vdev_path_iter(const char *by_vdev_path, const char *devid,
+ zfs_process_func_t func, boolean_t is_slice)
+{
+ dev_data_t data = { 0 };
+
+ data.dd_compare = by_vdev_path;
+ data.dd_func = func;
+ data.dd_prop = ZPOOL_CONFIG_PATH;
+ data.dd_found = B_FALSE;
+ data.dd_islabeled = is_slice;
+ data.dd_new_devid = devid;
+
+ if (strncmp(by_vdev_path, DEV_BYVDEV_PATH,
+ strlen(DEV_BYVDEV_PATH)) != 0) {
+ /* by_vdev_path doesn't start with "/dev/disk/by-vdev/" */
+ return (B_FALSE);
+ }
+
+ (void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
+
+ return (data.dd_found);
+}
+
+/*
* Given a device identifier, find any vdevs with a matching devid.
* On Linux we can match devid directly which is always a whole disk.
*/
@@ -683,15 +816,17 @@ guid_iter(uint64_t pool_guid, uint64_t vdev_guid, const char *devid,
static int
zfs_deliver_add(nvlist_t *nvl)
{
- char *devpath = NULL, *devid;
+ char *devpath = NULL, *devid = NULL;
uint64_t pool_guid = 0, vdev_guid = 0;
boolean_t is_slice;
/*
* Expecting a devid string and an optional physical location and guid
*/
- if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0)
+ if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid) != 0) {
+ zed_log_msg(LOG_INFO, "%s: no dev identifier\n", __func__);
return (-1);
+ }
(void) nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
@@ -707,6 +842,8 @@ zfs_deliver_add(nvlist_t *nvl)
* 1. ZPOOL_CONFIG_DEVID (identifies the unique disk)
* 2. ZPOOL_CONFIG_PHYS_PATH (identifies disk physical location).
* 3. ZPOOL_CONFIG_GUID (identifies unique vdev).
+ * 4. ZPOOL_CONFIG_PATH for /dev/disk/by-vdev devices only (since
+ * by-vdev paths represent physical paths).
*/
if (devid_iter(devid, zfs_process_add, is_slice))
return (0);
@@ -717,6 +854,16 @@ zfs_deliver_add(nvlist_t *nvl)
(void) guid_iter(pool_guid, vdev_guid, devid, zfs_process_add,
is_slice);
+ if (devpath != NULL) {
+ /* Can we match a /dev/disk/by-vdev/ path? */
+ char by_vdev_path[MAXPATHLEN];
+ snprintf(by_vdev_path, sizeof (by_vdev_path),
+ "/dev/disk/by-vdev/%s", devpath);
+ if (by_vdev_path_iter(by_vdev_path, devid, zfs_process_add,
+ is_slice))
+ return (0);
+ }
+
return (0);
}