diff --git a/include/sys/fm/fs/zfs.h b/include/sys/fm/fs/zfs.h index d157ea1533..4571939d2d 100644 --- a/include/sys/fm/fs/zfs.h +++ b/include/sys/fm/fs/zfs.h @@ -35,6 +35,7 @@ extern "C" { #define FM_EREPORT_ZFS_CHECKSUM "checksum" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" +#define FM_EREPORT_ZFS_DELAY "delay" #define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync" #define FM_EREPORT_ZFS_POOL "zpool" #define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy" @@ -81,6 +82,7 @@ extern "C" { #define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset" #define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size" #define FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS "zio_flags" +#define FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY "zio_delay" #define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state" #define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected" #define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual" diff --git a/include/sys/zio.h b/include/sys/zio.h index 97d8ec74d2..a469181745 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -116,6 +116,11 @@ enum zio_compress { ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \ (compress) == ZIO_COMPRESS_OFF) +/* + * Default Linux timeout for a sd device. + */ +#define ZIO_DELAY_MAX (30 * MILLISEC) + #define ZIO_FAILURE_MODE_WAIT 0 #define ZIO_FAILURE_MODE_CONTINUE 1 #define ZIO_FAILURE_MODE_PANIC 2 @@ -401,6 +406,7 @@ struct zio { enum zio_flag io_orig_flags; enum zio_stage io_orig_stage; enum zio_stage io_orig_pipeline; + uint64_t io_delay; int io_error; int io_child_error[ZIO_CHILD_TYPES]; uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES]; diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 863392544f..51062ef235 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -95,10 +95,10 @@ static void vdev_disk_error(zio_t *zio) { #ifdef ZFS_DEBUG - printk("ZFS: zio error=%d type=%d offset=%llu " - "size=%llu flags=%x\n", zio->io_error, zio->io_type, + printk("ZFS: zio error=%d type=%d offset=%llu size=%llu " + "flags=%x delay=%llu\n", zio->io_error, zio->io_type, (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size, - zio->io_flags); + zio->io_flags, (u_longlong_t)zio->io_delay); #endif } @@ -242,6 +242,8 @@ vdev_disk_dio_put(dio_request_t *dr) vdev_disk_dio_free(dr); if (zio) { + zio->io_delay = jiffies_to_msecs( + jiffies_64 - zio->io_delay); zio->io_error = error; ASSERT3S(zio->io_error, >=, 0); if (zio->io_error) @@ -403,6 +405,8 @@ retry: /* Extra reference to protect dio_request during submit_bio */ vdev_disk_dio_get(dr); + if (zio) + zio->io_delay = jiffies_64; /* Submit all bio's associated with this dio */ for (i = 0; i < dr->dr_bio_count; i++) @@ -442,6 +446,7 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc) { zio_t *zio = bio->bi_private; + zio->io_delay = jiffies_to_msecs(jiffies_64 - zio->io_delay); zio->io_error = -rc; if (rc && (rc == -EOPNOTSUPP)) zio->io_vd->vdev_nowritecache = B_TRUE; @@ -472,6 +477,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) bio->bi_end_io = vdev_disk_io_flush_completion; bio->bi_private = zio; bio->bi_bdev = bdev; + zio->io_delay = jiffies_64; submit_bio(WRITE_BARRIER, bio); return 0; diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index dd15c5d634..74f2756e3b 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -290,6 +290,8 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_INT32, zio->io_error, NULL); fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, DATA_TYPE_INT32, zio->io_flags, NULL); + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY, + DATA_TYPE_UINT64, zio->io_delay, NULL); /* * If the 'size' parameter is non-zero, it indicates this is a diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 70d3addf5c..9f34a23385 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -75,6 +75,7 @@ kmem_cache_t *zio_link_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; int zio_bulk_flags = 0; +int zio_delay_max = ZIO_DELAY_MAX; #ifdef _KERNEL extern vmem_t *zio_alloc_arena; @@ -2754,6 +2755,17 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); + /* + * If this I/O is attached to a particular vdev is slow, exeeding + * 30 seconds to complete, post an error described the I/O delay. + * We ignore these errors if the device is currently unavailable. + */ + if (zio->io_delay >= zio_delay_max) { + if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) + zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, + zio->io_vd, zio, 0, 0); + } + if (zio->io_error) { /* * If this I/O is attached to a particular vdev, @@ -2993,4 +3005,7 @@ EXPORT_SYMBOL(zio_type_name); module_param(zio_bulk_flags, int, 0644); MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers"); + +module_param(zio_delay_max, int, 0644); +MODULE_PARM_DESC(zio_delay_max, "Max zio delay before posting an event (ms)"); #endif