From d09bbdfce0069d8b92e3060a21403d994ba1fbec Mon Sep 17 00:00:00 2001 From: Don Brady Date: Mon, 9 Sep 2024 19:47:19 +0000 Subject: [PATCH] zed no fault ZED will diagnose a fault on a disk that has exceeded the threshold. It looks like it doesn't correctly handle the situation where one of a raidz children is undergoing a resilver and the faulted disks exceed the redundancy guarantees. This patch will prevent ZED from issuing any vdev faults and instead will degrade the vdev. Signed-off-by: Don Brady --- cmd/zed/agents/zfs_retire.c | 7 +++++++ cmd/zed/zed_conf.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c index f4063bea73..fb04792a0c 100644 --- a/cmd/zed/agents/zfs_retire.c +++ b/cmd/zed/agents/zfs_retire.c @@ -493,7 +493,14 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, * Actively fault the device if needed. */ if (fault_device) +#if 1 + /* + * Klara -- don't issue faults + */ + (void) zpool_vdev_degrade(zhp, vdev_guid, aux); +#else (void) zpool_vdev_fault(zhp, vdev_guid, aux); +#endif if (degrade_device) (void) zpool_vdev_degrade(zhp, vdev_guid, aux); diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c index 59935102f1..db0ede041e 100644 --- a/cmd/zed/zed_conf.c +++ b/cmd/zed/zed_conf.c @@ -183,7 +183,7 @@ _zed_conf_display_license(void) static void _zed_conf_display_version(void) { - printf("%s-%s-%s\n", + printf("%s-%s-%s-skip-faults\n", ZFS_META_NAME, ZFS_META_VERSION, ZFS_META_RELEASE); exit(EXIT_SUCCESS);