diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h index 3276796537..45de1f4993 100644 --- a/include/os/linux/kernel/linux/blkdev_compat.h +++ b/include/os/linux/kernel/linux/blkdev_compat.h @@ -126,7 +126,8 @@ typedef int bvec_iterator_t; #endif static inline void -bio_set_flags_failfast(struct block_device *bdev, int *flags) +bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev, + bool transport, bool driver) { #ifdef CONFIG_BUG /* @@ -148,7 +149,12 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags) #endif /* BLOCK_EXT_MAJOR */ #endif /* CONFIG_BUG */ - *flags |= REQ_FAILFAST_MASK; + if (dev) + *flags |= REQ_FAILFAST_DEV; + if (transport) + *flags |= REQ_FAILFAST_TRANSPORT; + if (driver) + *flags |= REQ_FAILFAST_DRIVER; } /* diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 10a5ec3172..1124604e8c 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -355,6 +355,7 @@ typedef enum { VDEV_PROP_BYTES_TRIM, VDEV_PROP_REMOVING, VDEV_PROP_ALLOCATING, + VDEV_PROP_FAILFAST, VDEV_NUM_PROPS } vdev_prop_t; diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index bfa8fe093d..3f4b78b947 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -299,6 +299,7 @@ struct vdev { uint64_t vdev_islog; /* is an intent log device */ uint64_t vdev_noalloc; /* device is passivated? */ uint64_t vdev_removing; /* device is being removed? */ + uint64_t vdev_failfast; /* device failfast setting */ boolean_t vdev_ishole; /* is a hole in the namespace */ uint64_t vdev_top_zap; vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */ diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 061a060b66..98873784e7 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -3214,7 +3214,8 @@ - + + diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index ad3d8810e9..98539a6369 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -15,7 +15,7 @@ .\" own identifying information: .\" Portions Copyright [yyyy] [name of copyright owner] .\" -.Dd November 7, 2022 +.Dd November 9, 2022 .Dt ZFS 4 .Os . @@ -1345,6 +1345,19 @@ as fuller devices will tend to be slower than empty devices. Also see .Sy zio_dva_throttle_enabled . . +.It Sy zfs_vdev_failfast_mask Ns = Ns Sy 1 Pq uint +Defines if the driver should retire on a given error type. +The following options may be bitwise-ored together: +.TS +box; +lbz r l l . + Value Name Description +_ + 1 Device No driver retries on device errors + 2 Transport No driver retries on transport errors. + 4 Driver No driver retries on driver errors. +.TE +. .It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int Time before expiring .Pa .zfs/snapshot . @@ -1364,7 +1377,7 @@ The following flags may be bitwise-ored together: .TS box; lbz r l l . - Value Symbolic Name Description + Value Name Description _ 1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log. * 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications. diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7 index b98bda064c..af5d26f6b4 100644 --- a/man/man7/vdevprops.7 +++ b/man/man7/vdevprops.7 @@ -20,7 +20,7 @@ .\" .\" Copyright (c) 2021 Klara, Inc. .\" -.Dd November 27, 2021 +.Dd October 30, 2022 .Dt VDEVPROPS 7 .Os . @@ -121,6 +121,9 @@ dataset. A text comment up to 8192 characters long .It Sy bootsize The amount of space to reserve for the EFI system partition +.It Sy failfast +If this device should propage BIO errors back to ZFS, used to disable +failfast. .It Sy path The path to the device for this vdev .It Sy allocating diff --git a/module/os/linux/zfs/vdev_disk.c b/module/os/linux/zfs/vdev_disk.c index 84d191abb9..4f33009f14 100644 --- a/module/os/linux/zfs/vdev_disk.c +++ b/module/os/linux/zfs/vdev_disk.c @@ -74,6 +74,12 @@ typedef struct dio_request { struct bio *dr_bio[0]; /* Attached bio's */ } dio_request_t; +/* + * BIO request failfast mask. + */ + +static unsigned int zfs_vdev_failfast_mask = 1; + static fmode_t vdev_bdev_mode(spa_mode_t spa_mode) { @@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, retry: dr = vdev_disk_dio_alloc(bio_count); - if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD))) - bio_set_flags_failfast(bdev, &flags); + if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) && + zio->io_vd->vdev_failfast == B_TRUE) { + bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1, + zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4); + } dr->dr_zio = zio; @@ -1045,3 +1054,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp) ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW, "Timeout before determining that a device is missing"); + +ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW, + "Defines failfast mask: 1 - device, 2 - transport, 4 - driver"); diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index 4737bd628d..285b979096 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -420,6 +420,9 @@ vdev_prop_init(void) boolean_na_table, sfeatures); /* default index properties */ + zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, + PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table, + sfeatures); /* hidden properties */ zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING, diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 8c62112de7..4520ca31b7 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd) } } + if (vd == vd->vdev_top && vd->vdev_top_zap != 0) { + spa_t *spa = vd->vdev_spa; + uint64_t failfast; + + error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap, + vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast), + 1, &failfast); + if (error == 0) { + vd->vdev_failfast = failfast & 1; + } else if (error == ENOENT) { + vd->vdev_failfast = vdev_prop_default_numeric( + VDEV_PROP_FAILFAST); + } else { + vdev_dbgmsg(vd, + "vdev_load: zap_lookup(top_zap=%llu) " + "failed [error=%d]", + (u_longlong_t)vd->vdev_top_zap, error); + } + } + /* * Load any rebuild state from the top-level vdev zap. */ @@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) else error = spa_vdev_alloc(spa, vdev_guid); break; + case VDEV_PROP_FAILFAST: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->vdev_failfast = intval & 1; + break; default: /* Most processing is done in vdev_props_set_sync */ break; @@ -6019,6 +6046,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) intval = ZPROP_BOOLEAN_NA; } + vdev_prop_add_list(outnvl, propname, strval, + intval, src); + break; + case VDEV_PROP_FAILFAST: + src = ZPROP_SRC_LOCAL; + strval = NULL; + + err = zap_lookup(mos, objid, nvpair_name(elem), + sizeof (uint64_t), 1, &intval); + if (err == ENOENT) { + intval = vdev_prop_default_numeric( + prop); + err = 0; + } else if (err) { + break; + } + if (intval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + vdev_prop_add_list(outnvl, propname, strval, intval, src); break;