diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 0d2f0b1da5..e652045bbc 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -6474,6 +6474,7 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb) spa->spa_normal_class->mc_ops = &zdb_metaslab_ops; spa->spa_log_class->mc_ops = &zdb_metaslab_ops; spa->spa_embedded_log_class->mc_ops = &zdb_metaslab_ops; + spa->spa_special_embedded_log_class->mc_ops = &zdb_metaslab_ops; zcb->zcb_vd_obsolete_counts = umem_zalloc(rvd->vdev_children * sizeof (uint32_t *), @@ -6614,8 +6615,11 @@ zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb) for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { metaslab_t *msp = vd->vdev_ms[m]; - ASSERT3P(msp->ms_group, ==, (msp->ms_group->mg_class == - spa_embedded_log_class(spa)) ? + ASSERT3P(msp->ms_group, ==, ( + (msp->ms_group->mg_class == + spa_embedded_log_class(spa)) || + (msp->ms_group->mg_class == + spa_special_embedded_log_class(spa))) ? vd->vdev_log_mg : vd->vdev_mg); /* @@ -6846,6 +6850,8 @@ dump_block_stats(spa_t *spa) zcb->zcb_totalasize += metaslab_class_get_alloc(spa_dedup_class(spa)); zcb->zcb_totalasize += metaslab_class_get_alloc(spa_embedded_log_class(spa)); + zcb->zcb_totalasize += + metaslab_class_get_alloc(spa_special_embedded_log_class(spa)); zcb->zcb_start = zcb->zcb_lastprint = gethrtime(); err = traverse_pool(spa, 0, flags, zdb_blkptr_cb, zcb); @@ -6895,6 +6901,7 @@ dump_block_stats(spa_t *spa) metaslab_class_get_alloc(spa_log_class(spa)) + metaslab_class_get_alloc(spa_embedded_log_class(spa)) + metaslab_class_get_alloc(spa_special_class(spa)) + + metaslab_class_get_alloc(spa_special_embedded_log_class(spa)) + metaslab_class_get_alloc(spa_dedup_class(spa)) + get_unflushed_alloc_space(spa); total_found = @@ -6977,6 +6984,17 @@ dump_block_stats(spa_t *spa) "Embedded log class", (u_longlong_t)alloc, 100.0 * alloc / space); } + if (spa_special_embedded_log_class(spa)->mc_allocator[0].mca_rotor + != NULL) { + uint64_t alloc = metaslab_class_get_alloc( + spa_special_embedded_log_class(spa)); + uint64_t space = metaslab_class_get_space( + spa_special_embedded_log_class(spa)); + + (void) printf("\t%-16s %14llu used: %5.2f%%\n", + "Special embedded log class", (u_longlong_t)alloc, + 100.0 * alloc / space); + } for (i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) { if (zcb->zcb_embedded_blocks[i] == 0) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index e191420f2d..1bdcac2bdc 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -368,6 +368,7 @@ typedef enum { VDEV_PROP_RAIDZ_EXPANDING, VDEV_PROP_SLOW_IO_N, VDEV_PROP_SLOW_IO_T, + VDEV_PROP_ELOG, VDEV_NUM_PROPS } vdev_prop_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 3073c4d1b9..074e408fea 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1049,6 +1049,7 @@ extern metaslab_class_t *spa_normal_class(spa_t *spa); extern metaslab_class_t *spa_log_class(spa_t *spa); extern metaslab_class_t *spa_embedded_log_class(spa_t *spa); extern metaslab_class_t *spa_special_class(spa_t *spa); +extern metaslab_class_t *spa_special_embedded_log_class(spa_t *spa); extern metaslab_class_t *spa_dedup_class(spa_t *spa); extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype, uint_t level, uint_t special_smallblk); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 5605a35b86..067decd833 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -248,6 +248,7 @@ struct spa { metaslab_class_t *spa_normal_class; /* normal data class */ metaslab_class_t *spa_log_class; /* intent log data class */ metaslab_class_t *spa_embedded_log_class; /* log on normal vdevs */ + metaslab_class_t *spa_special_embedded_log_class; /* "" special */ metaslab_class_t *spa_special_class; /* special allocation class */ metaslab_class_t *spa_dedup_class; /* dedup allocation class */ uint64_t spa_first_txg; /* first txg after spa_open() */ diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index 57ff31e89e..2433d0ba92 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -268,6 +268,7 @@ struct vdev { uint64_t vdev_ms_count; /* number of metaslabs */ metaslab_group_t *vdev_mg; /* metaslab group */ metaslab_group_t *vdev_log_mg; /* embedded slog metaslab group */ + boolean_t use_embedded_log; /* use embedded slog mg */ metaslab_t **vdev_ms; /* metaslab array */ txg_list_t vdev_ms_list; /* per-txg dirty metaslab lists */ txg_list_t vdev_dtl_list; /* per-txg dirty DTL lists */ diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index e2e3bf5be6..e17c490e23 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -448,6 +448,9 @@ vdev_prop_init(void) zprop_register_index(VDEV_PROP_RAIDZ_EXPANDING, "raidz_expanding", 0, PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "RAIDZ_EXPANDING", boolean_table, sfeatures); + zprop_register_index(VDEV_PROP_ELOG, "embedded_log_target", 1, + PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "ELOG", + boolean_table, sfeatures); /* default index properties */ zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE, diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 7170b5eefc..e54564fb13 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -1225,6 +1225,15 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor, spa_t *spa = mg->mg_vd->vdev_spa; metaslab_class_t *mc = mg->mg_class; + /* + * If we're attempting to allocate from an embedded_log class, + * and we have it set to not use that on this vdev, don't. + */ + if ((mc == spa_special_embedded_log_class(spa) || + mc == spa_embedded_log_class(spa)) && + mg->mg_vd->use_embedded_log == B_FALSE) { + return (B_FALSE); + } /* * We can only consider skipping this metaslab group if it's * in the normal metaslab class and there are other metaslab @@ -5229,6 +5238,7 @@ top: ASSERT(mg->mg_activation_count == 1); vd = mg->mg_vd; + /* * Don't allocate from faulted devices. */ @@ -5240,6 +5250,18 @@ top: allocatable = vdev_allocatable(vd); } + /* + * If we're trying a log allocation from an + * embedded_log allocation class, and we + * have turned off allocating those from this vdev, + * don't. + */ + if ((mc == spa_special_embedded_log_class(spa) || + mc == spa_embedded_log_class(spa)) && + ((flags & METASLAB_ZIL) != 0) && + vd->use_embedded_log == B_FALSE) + allocatable = B_FALSE; + /* * Determine if the selected metaslab group is eligible * for allocations. If we're ganging then don't allow diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 638572996c..833726c2f1 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -373,11 +373,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp) alloc += metaslab_class_get_alloc(spa_special_class(spa)); alloc += metaslab_class_get_alloc(spa_dedup_class(spa)); alloc += metaslab_class_get_alloc(spa_embedded_log_class(spa)); + alloc += metaslab_class_get_alloc( + spa_special_embedded_log_class(spa)); size = metaslab_class_get_space(mc); size += metaslab_class_get_space(spa_special_class(spa)); size += metaslab_class_get_space(spa_dedup_class(spa)); size += metaslab_class_get_space(spa_embedded_log_class(spa)); + size += metaslab_class_get_space( + spa_special_embedded_log_class(spa)); spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src); spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src); @@ -1623,6 +1627,7 @@ spa_activate(spa_t *spa, spa_mode_t mode) spa->spa_log_class = metaslab_class_create(spa, msp); spa->spa_embedded_log_class = metaslab_class_create(spa, msp); spa->spa_special_class = metaslab_class_create(spa, msp); + spa->spa_special_embedded_log_class = metaslab_class_create(spa, msp); spa->spa_dedup_class = metaslab_class_create(spa, msp); /* Try to create a covering process */ @@ -1796,6 +1801,9 @@ spa_deactivate(spa_t *spa) metaslab_class_destroy(spa->spa_special_class); spa->spa_special_class = NULL; + metaslab_class_destroy(spa->spa_special_embedded_log_class); + spa->spa_special_embedded_log_class = NULL; + metaslab_class_destroy(spa->spa_dedup_class); spa->spa_dedup_class = NULL; @@ -8909,6 +8917,8 @@ spa_async_thread(void *arg) old_space += metaslab_class_get_space(spa_dedup_class(spa)); old_space += metaslab_class_get_space( spa_embedded_log_class(spa)); + old_space += metaslab_class_get_space( + spa_special_embedded_log_class(spa)); spa_config_update(spa, SPA_CONFIG_UPDATE_POOL); @@ -8917,6 +8927,8 @@ spa_async_thread(void *arg) new_space += metaslab_class_get_space(spa_dedup_class(spa)); new_space += metaslab_class_get_space( spa_embedded_log_class(spa)); + new_space += metaslab_class_get_space( + spa_special_embedded_log_class(spa)); mutex_exit(&spa_namespace_lock); /* diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index d1d41bbe72..870240b724 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1300,7 +1300,7 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, int config_changed = B_FALSE; - ASSERT(txg > spa_last_synced_txg(spa)); + ASSERT3U(txg, >, spa_last_synced_txg(spa)); spa->spa_pending_vdev = NULL; @@ -1317,11 +1317,13 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, /* * Verify the metaslab classes. */ - ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0); - ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0); - ASSERT(metaslab_class_validate(spa_embedded_log_class(spa)) == 0); - ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0); - ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0); + ASSERT3U(metaslab_class_validate(spa_normal_class(spa)), ==, 0); + ASSERT3U(metaslab_class_validate(spa_log_class(spa)), ==, 0); + ASSERT3U(metaslab_class_validate(spa_embedded_log_class(spa)), ==, 0); + ASSERT3U(metaslab_class_validate(spa_special_class(spa)), ==, 0); + ASSERT3U(metaslab_class_validate( + spa_special_embedded_log_class(spa)), ==, 0); + ASSERT3U(metaslab_class_validate(spa_dedup_class(spa)), ==, 0); spa_config_exit(spa, SCL_ALL, spa); @@ -1889,6 +1891,10 @@ spa_get_slop_space(spa_t *spa) metaslab_class_get_dspace(spa_embedded_log_class(spa)); slop -= MIN(embedded_log, slop >> 1); + uint64_t s_embedded_log = + metaslab_class_get_dspace(spa_special_embedded_log_class(spa)); + slop -= MIN(s_embedded_log, slop >> 1); + /* * Slop space should be at least spa_min_slop, but no more than half * the entire pool. @@ -1990,6 +1996,12 @@ spa_special_class(spa_t *spa) return (spa->spa_special_class); } +metaslab_class_t * +spa_special_embedded_log_class(spa_t *spa) +{ + return (spa->spa_special_embedded_log_class); +} + metaslab_class_t * spa_dedup_class(spa_t *spa) { diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index c74f72159d..9ec2268e32 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -270,8 +270,9 @@ vdev_getops(const char *type) metaslab_group_t * vdev_get_mg(vdev_t *vd, metaslab_class_t *mc) { - if (mc == spa_embedded_log_class(vd->vdev_spa) && - vd->vdev_log_mg != NULL) + if ((mc == spa_embedded_log_class(vd->vdev_spa) || + mc == spa_special_embedded_log_class(vd->vdev_spa)) && + vd->vdev_log_mg != NULL && vd->use_embedded_log == B_TRUE) return (vd->vdev_log_mg); else return (vd->vdev_mg); @@ -1476,6 +1477,13 @@ vdev_metaslab_group_create(vdev_t *vd) if (!vd->vdev_islog) { vd->vdev_log_mg = metaslab_group_create( spa_embedded_log_class(spa), vd, 1); + vd->use_embedded_log = B_TRUE; + } + + if (vd->vdev_alloc_bias == VDEV_BIAS_SPECIAL) { + vd->vdev_log_mg = metaslab_group_create( + spa_special_embedded_log_class(spa), vd, 1); + vd->use_embedded_log = B_TRUE; } /* @@ -1561,7 +1569,8 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) * embedded slog by moving it from the regular to the log metaslab * group. */ - if (vd->vdev_mg->mg_class == spa_normal_class(spa) && + if ((vd->vdev_mg->mg_class == spa_normal_class(spa) || + vd->vdev_mg->mg_class == spa_special_class(spa)) && vd->vdev_ms_count > zfs_embedded_slog_min_ms && avl_is_empty(&vd->vdev_log_mg->mg_metaslab_tree)) { uint64_t slog_msid = 0; @@ -6008,11 +6017,18 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) } vd->vdev_slow_io_t = intval; break; + case VDEV_PROP_ELOG: + if (nvpair_value_uint64(elem, &intval) != 0) { + error = EINVAL; + break; + } + vd->use_embedded_log = intval; + break; default: /* Most processing is done in vdev_props_set_sync */ break; } -end: + end: if (error != 0) { intval = error; vdev_prop_add_list(outnvl, propname, strval, intval, 0); @@ -6328,6 +6344,17 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl) vdev_prop_add_list(outnvl, propname, NULL, intval, src); break; + case VDEV_PROP_ELOG: + intval = vd->use_embedded_log; + + if (intval == vdev_prop_default_numeric(prop)) + src = ZPROP_SRC_DEFAULT; + else + src = ZPROP_SRC_LOCAL; + vdev_prop_add_list(outnvl, propname, NULL, + intval, src); + break; + case VDEV_PROP_FAILFAST: src = ZPROP_SRC_LOCAL; strval = NULL; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index d68d5ababe..7753d9d571 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3857,6 +3857,11 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); *slog = (error == 0); + if (error != 0) { + error = metaslab_alloc(spa, spa_special_embedded_log_class(spa), + size, new_bp, 1, txg, NULL, flags, + &io_alloc_list, NULL, allocator); + } if (error != 0) { error = metaslab_alloc(spa, spa_embedded_log_class(spa), size, new_bp, 1, txg, NULL, flags,