Refcounted DSL Crypto Key Mappings
Since native ZFS encryption was merged, we have been fighting against a series of bugs that come down to the same problem: Key mappings (which must be present during all I/O operations) are created and destroyed based on dataset ownership, but I/Os can have traditionally been allowed to "leak" into the next txg after the dataset is disowned. In the past we have attempted to solve this problem by trying to ensure that datasets are disowned ater all I/O is finished by calling txg_wait_synced(), but we have repeatedly found edge cases that need to be squashed and code paths that might incur a high number of txg syncs. This patch attempts to resolve this issue differently, by adding a reference to the key mapping for each txg it is dirtied in. By doing so, we can remove many of the unnecessary calls to txg_wait_synced() we have added in the past and ensure we don't need to deal with this problem in the future. Reviewed-by: Jorgen Lundman <lundman@lundman.net> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7949
This commit is contained in:
parent
f65fbee1e7
commit
52ce99dd61
|
@ -4116,7 +4116,6 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
|
||||||
ztest_zd_init(zdtmp, NULL, os);
|
ztest_zd_init(zdtmp, NULL, os);
|
||||||
zil_replay(os, zdtmp, ztest_replay_vector);
|
zil_replay(os, zdtmp, ztest_replay_vector);
|
||||||
ztest_zd_fini(zdtmp);
|
ztest_zd_fini(zdtmp);
|
||||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
|
||||||
dmu_objset_disown(os, B_TRUE, FTAG);
|
dmu_objset_disown(os, B_TRUE, FTAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4186,7 +4185,6 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
|
||||||
B_FALSE, B_TRUE, FTAG, &os2));
|
B_FALSE, B_TRUE, FTAG, &os2));
|
||||||
|
|
||||||
zil_close(zilog);
|
zil_close(zilog);
|
||||||
txg_wait_synced(spa_get_dsl(os->os_spa), 0);
|
|
||||||
dmu_objset_disown(os, B_TRUE, FTAG);
|
dmu_objset_disown(os, B_TRUE, FTAG);
|
||||||
ztest_zd_fini(zdtmp);
|
ztest_zd_fini(zdtmp);
|
||||||
out:
|
out:
|
||||||
|
@ -6870,7 +6868,6 @@ ztest_dataset_close(int d)
|
||||||
ztest_ds_t *zd = &ztest_ds[d];
|
ztest_ds_t *zd = &ztest_ds[d];
|
||||||
|
|
||||||
zil_close(zd->zd_zilog);
|
zil_close(zd->zd_zilog);
|
||||||
txg_wait_synced(spa_get_dsl(zd->zd_os->os_spa), 0);
|
|
||||||
dmu_objset_disown(zd->zd_os, B_TRUE, zd);
|
dmu_objset_disown(zd->zd_os, B_TRUE, zd);
|
||||||
|
|
||||||
ztest_zd_fini(zd);
|
ztest_zd_fini(zd);
|
||||||
|
|
|
@ -111,7 +111,7 @@ typedef struct dsl_crypto_key {
|
||||||
/* link on spa_keystore_t:sk_dsl_keys */
|
/* link on spa_keystore_t:sk_dsl_keys */
|
||||||
avl_node_t dck_avl_link;
|
avl_node_t dck_avl_link;
|
||||||
|
|
||||||
/* refcount of dsl_key_mapping_t's holding this key */
|
/* refcount of holders of this key */
|
||||||
zfs_refcount_t dck_holds;
|
zfs_refcount_t dck_holds;
|
||||||
|
|
||||||
/* master key used to derive encryption keys */
|
/* master key used to derive encryption keys */
|
||||||
|
@ -181,10 +181,11 @@ int spa_keystore_load_wkey(const char *dsname, dsl_crypto_params_t *dcp,
|
||||||
int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
|
int spa_keystore_unload_wkey_impl(spa_t *spa, uint64_t ddobj);
|
||||||
int spa_keystore_unload_wkey(const char *dsname);
|
int spa_keystore_unload_wkey(const char *dsname);
|
||||||
|
|
||||||
int spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj, dsl_dir_t *dd,
|
int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag,
|
||||||
void *tag);
|
dsl_key_mapping_t **km_out);
|
||||||
int spa_keystore_create_mapping(spa_t *spa, struct dsl_dataset *ds, void *tag);
|
|
||||||
int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
|
int spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag);
|
||||||
|
void key_mapping_add_ref(dsl_key_mapping_t *km, void *tag);
|
||||||
|
void key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag);
|
||||||
int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
|
int spa_keystore_lookup_key(spa_t *spa, uint64_t dsobj, void *tag,
|
||||||
dsl_crypto_key_t **dck_out);
|
dsl_crypto_key_t **dck_out);
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,7 @@ struct dsl_dataset;
|
||||||
struct dsl_dir;
|
struct dsl_dir;
|
||||||
struct dsl_pool;
|
struct dsl_pool;
|
||||||
struct dsl_crypto_params;
|
struct dsl_crypto_params;
|
||||||
|
struct dsl_key_mapping;
|
||||||
|
|
||||||
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
#define DS_FLAG_INCONSISTENT (1ULL<<0)
|
||||||
#define DS_IS_INCONSISTENT(ds) \
|
#define DS_IS_INCONSISTENT(ds) \
|
||||||
|
@ -165,6 +166,7 @@ typedef struct dsl_dataset {
|
||||||
uint64_t ds_object;
|
uint64_t ds_object;
|
||||||
uint64_t ds_fsid_guid;
|
uint64_t ds_fsid_guid;
|
||||||
boolean_t ds_is_snapshot;
|
boolean_t ds_is_snapshot;
|
||||||
|
struct dsl_key_mapping *ds_key_mapping;
|
||||||
|
|
||||||
/* only used in syncing context, only valid for non-snapshots: */
|
/* only used in syncing context, only valid for non-snapshots: */
|
||||||
struct dsl_dataset *ds_prev;
|
struct dsl_dataset *ds_prev;
|
||||||
|
@ -305,10 +307,12 @@ int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
|
||||||
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
|
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
|
||||||
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
|
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
|
||||||
void *tag);
|
void *tag);
|
||||||
|
int dsl_dataset_create_key_mapping(dsl_dataset_t *ds);
|
||||||
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
|
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
|
||||||
dsl_dataset_t **);
|
dsl_dataset_t **);
|
||||||
int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
|
int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
|
||||||
ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
|
ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
|
||||||
|
void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds);
|
||||||
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
|
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
|
||||||
void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
|
void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
|
||||||
void *tag);
|
void *tag);
|
||||||
|
|
|
@ -1157,6 +1157,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
{
|
{
|
||||||
dmu_objset_create_arg_t *doca = arg;
|
dmu_objset_create_arg_t *doca = arg;
|
||||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||||
|
spa_t *spa = dp->dp_spa;
|
||||||
dsl_dir_t *pdd;
|
dsl_dir_t *pdd;
|
||||||
const char *tail;
|
const char *tail;
|
||||||
dsl_dataset_t *ds;
|
dsl_dataset_t *ds;
|
||||||
|
@ -1174,8 +1175,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
|
DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
|
||||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||||
bp = dsl_dataset_get_blkptr(ds);
|
bp = dsl_dataset_get_blkptr(ds);
|
||||||
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
|
os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx);
|
||||||
ds, bp, doca->doca_type, tx);
|
|
||||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||||
|
|
||||||
if (doca->doca_userfunc != NULL) {
|
if (doca->doca_userfunc != NULL) {
|
||||||
|
@ -1199,7 +1199,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
ds->ds_owner = FTAG;
|
ds->ds_owner = FTAG;
|
||||||
mutex_exit(&ds->ds_lock);
|
mutex_exit(&ds->ds_lock);
|
||||||
|
|
||||||
rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||||
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
|
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
|
||||||
tx->tx_txg);
|
tx->tx_txg);
|
||||||
if (tmpds != NULL) {
|
if (tmpds != NULL) {
|
||||||
|
@ -1210,8 +1210,12 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
|
|
||||||
dmu_objset_do_userquota_updates(os, tx);
|
dmu_objset_do_userquota_updates(os, tx);
|
||||||
taskq_wait(dp->dp_sync_taskq);
|
taskq_wait(dp->dp_sync_taskq);
|
||||||
|
if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
|
||||||
|
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||||
|
key_mapping_rele(spa, ds->ds_key_mapping, ds);
|
||||||
|
}
|
||||||
|
|
||||||
rzio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||||
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
|
tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
|
||||||
tx->tx_txg);
|
tx->tx_txg);
|
||||||
if (tmpds != NULL) {
|
if (tmpds != NULL) {
|
||||||
|
@ -1220,8 +1224,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
}
|
}
|
||||||
VERIFY0(zio_wait(rzio));
|
VERIFY0(zio_wait(rzio));
|
||||||
|
|
||||||
if (need_sync_done)
|
if (need_sync_done) {
|
||||||
|
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||||
|
key_mapping_rele(spa, ds->ds_key_mapping, ds);
|
||||||
dsl_dataset_sync_done(ds, tx);
|
dsl_dataset_sync_done(ds, tx);
|
||||||
|
}
|
||||||
|
|
||||||
mutex_enter(&ds->ds_lock);
|
mutex_enter(&ds->ds_lock);
|
||||||
ds->ds_owner = NULL;
|
ds->ds_owner = NULL;
|
||||||
|
@ -1229,7 +1236,7 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||||
}
|
}
|
||||||
|
|
||||||
spa_history_log_internal_ds(ds, "create", tx, "");
|
spa_history_log_internal_ds(ds, "create", tx, "");
|
||||||
zvol_create_minors(dp->dp_spa, doca->doca_name, B_TRUE);
|
zvol_create_minors(spa, doca->doca_name, B_TRUE);
|
||||||
|
|
||||||
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
|
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
|
||||||
dsl_dir_rele(pdd, FTAG);
|
dsl_dir_rele(pdd, FTAG);
|
||||||
|
@ -1702,7 +1709,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||||
if (os->os_raw_receive ||
|
if (os->os_raw_receive ||
|
||||||
os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
|
os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
|
||||||
ASSERT(os->os_encrypted);
|
ASSERT(os->os_encrypted);
|
||||||
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
|
|
||||||
arc_convert_to_raw(os->os_phys_buf,
|
arc_convert_to_raw(os->os_phys_buf,
|
||||||
os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
|
os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
|
||||||
DMU_OT_OBJSET, NULL, NULL, NULL);
|
DMU_OT_OBJSET, NULL, NULL, NULL);
|
||||||
|
|
|
@ -896,6 +896,20 @@ spa_keystore_unload_wkey(const char *dsname)
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
dsl_dir_t *dd = NULL;
|
dsl_dir_t *dd = NULL;
|
||||||
dsl_pool_t *dp = NULL;
|
dsl_pool_t *dp = NULL;
|
||||||
|
spa_t *spa = NULL;
|
||||||
|
|
||||||
|
ret = spa_open(dsname, &spa, FTAG);
|
||||||
|
if (ret != 0)
|
||||||
|
return (ret);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for any outstanding txg IO to complete, releasing any
|
||||||
|
* remaining references on the wkey.
|
||||||
|
*/
|
||||||
|
if (spa_mode(spa) != FREAD)
|
||||||
|
txg_wait_synced(spa->spa_dsl_pool, 0);
|
||||||
|
|
||||||
|
spa_close(spa, FTAG);
|
||||||
|
|
||||||
/* hold the dsl dir */
|
/* hold the dsl dir */
|
||||||
ret = dsl_pool_hold(dsname, FTAG, &dp);
|
ret = dsl_pool_hold(dsname, FTAG, &dp);
|
||||||
|
@ -935,9 +949,56 @@ error:
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
key_mapping_add_ref(dsl_key_mapping_t *km, void *tag)
|
||||||
|
{
|
||||||
|
ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
|
||||||
|
zfs_refcount_add(&km->km_refcnt, tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The locking here is a little tricky to ensure we don't cause unnecessary
|
||||||
|
* performance problems. We want to release a key mapping whenever someone
|
||||||
|
* decrements the refcount to 0, but freeing the mapping requires removing
|
||||||
|
* it from the spa_keystore, which requires holding sk_km_lock as a writer.
|
||||||
|
* Most of the time we don't want to hold this lock as a writer, since the
|
||||||
|
* same lock is held as a reader for each IO that needs to encrypt / decrypt
|
||||||
|
* data for any dataset and in practice we will only actually free the
|
||||||
|
* mapping after unmounting a dataset.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
key_mapping_rele(spa_t *spa, dsl_key_mapping_t *km, void *tag)
|
||||||
|
{
|
||||||
|
ASSERT3U(zfs_refcount_count(&km->km_refcnt), >=, 1);
|
||||||
|
|
||||||
|
if (zfs_refcount_remove(&km->km_refcnt, tag) != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We think we are going to need to free the mapping. Add a
|
||||||
|
* reference to prevent most other releasers from thinking
|
||||||
|
* this might be their responsibility. This is inherently
|
||||||
|
* racy, so we will confirm that we are legitimately the
|
||||||
|
* last holder once we have the sk_km_lock as a writer.
|
||||||
|
*/
|
||||||
|
zfs_refcount_add(&km->km_refcnt, FTAG);
|
||||||
|
|
||||||
|
rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
|
||||||
|
if (zfs_refcount_remove(&km->km_refcnt, FTAG) != 0) {
|
||||||
|
rw_exit(&spa->spa_keystore.sk_km_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
avl_remove(&spa->spa_keystore.sk_key_mappings, km);
|
||||||
|
rw_exit(&spa->spa_keystore.sk_km_lock);
|
||||||
|
|
||||||
|
spa_keystore_dsl_key_rele(spa, km->km_key, km);
|
||||||
|
kmem_free(km, sizeof (dsl_key_mapping_t));
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
|
spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag,
|
||||||
dsl_dir_t *dd, void *tag)
|
dsl_key_mapping_t **km_out)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
avl_index_t where;
|
avl_index_t where;
|
||||||
|
@ -948,14 +1009,17 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
|
||||||
km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP);
|
km = kmem_zalloc(sizeof (dsl_key_mapping_t), KM_SLEEP);
|
||||||
zfs_refcount_create(&km->km_refcnt);
|
zfs_refcount_create(&km->km_refcnt);
|
||||||
|
|
||||||
ret = spa_keystore_dsl_key_hold_dd(spa, dd, km, &km->km_key);
|
ret = spa_keystore_dsl_key_hold_dd(spa, ds->ds_dir, km, &km->km_key);
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
zfs_refcount_destroy(&km->km_refcnt);
|
zfs_refcount_destroy(&km->km_refcnt);
|
||||||
kmem_free(km, sizeof (dsl_key_mapping_t));
|
kmem_free(km, sizeof (dsl_key_mapping_t));
|
||||||
|
|
||||||
|
if (km_out != NULL)
|
||||||
|
*km_out = NULL;
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
km->km_dsobj = dsobj;
|
km->km_dsobj = ds->ds_object;
|
||||||
|
|
||||||
rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
|
rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
|
||||||
|
|
||||||
|
@ -971,9 +1035,13 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
|
||||||
if (found_km != NULL) {
|
if (found_km != NULL) {
|
||||||
should_free = B_TRUE;
|
should_free = B_TRUE;
|
||||||
zfs_refcount_add(&found_km->km_refcnt, tag);
|
zfs_refcount_add(&found_km->km_refcnt, tag);
|
||||||
|
if (km_out != NULL)
|
||||||
|
*km_out = found_km;
|
||||||
} else {
|
} else {
|
||||||
zfs_refcount_add(&km->km_refcnt, tag);
|
zfs_refcount_add(&km->km_refcnt, tag);
|
||||||
avl_insert(&spa->spa_keystore.sk_key_mappings, km, where);
|
avl_insert(&spa->spa_keystore.sk_key_mappings, km, where);
|
||||||
|
if (km_out != NULL)
|
||||||
|
*km_out = km;
|
||||||
}
|
}
|
||||||
|
|
||||||
rw_exit(&spa->spa_keystore.sk_km_lock);
|
rw_exit(&spa->spa_keystore.sk_km_lock);
|
||||||
|
@ -987,25 +1055,17 @@ spa_keystore_create_mapping_impl(spa_t *spa, uint64_t dsobj,
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
spa_keystore_create_mapping(spa_t *spa, dsl_dataset_t *ds, void *tag)
|
|
||||||
{
|
|
||||||
return (spa_keystore_create_mapping_impl(spa, ds->ds_object,
|
|
||||||
ds->ds_dir, tag));
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
|
spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
dsl_key_mapping_t search_km;
|
dsl_key_mapping_t search_km;
|
||||||
dsl_key_mapping_t *found_km;
|
dsl_key_mapping_t *found_km;
|
||||||
boolean_t should_free = B_FALSE;
|
|
||||||
|
|
||||||
/* init the search key mapping */
|
/* init the search key mapping */
|
||||||
search_km.km_dsobj = dsobj;
|
search_km.km_dsobj = dsobj;
|
||||||
|
|
||||||
rw_enter(&spa->spa_keystore.sk_km_lock, RW_WRITER);
|
rw_enter(&spa->spa_keystore.sk_km_lock, RW_READER);
|
||||||
|
|
||||||
/* find the matching mapping */
|
/* find the matching mapping */
|
||||||
found_km = avl_find(&spa->spa_keystore.sk_key_mappings,
|
found_km = avl_find(&spa->spa_keystore.sk_key_mappings,
|
||||||
|
@ -1015,23 +1075,9 @@ spa_keystore_remove_mapping(spa_t *spa, uint64_t dsobj, void *tag)
|
||||||
goto error_unlock;
|
goto error_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Decrement the refcount on the mapping and remove it from the tree if
|
|
||||||
* it is zero. Try to minimize time spent in this lock by deferring
|
|
||||||
* cleanup work.
|
|
||||||
*/
|
|
||||||
if (zfs_refcount_remove(&found_km->km_refcnt, tag) == 0) {
|
|
||||||
should_free = B_TRUE;
|
|
||||||
avl_remove(&spa->spa_keystore.sk_key_mappings, found_km);
|
|
||||||
}
|
|
||||||
|
|
||||||
rw_exit(&spa->spa_keystore.sk_km_lock);
|
rw_exit(&spa->spa_keystore.sk_km_lock);
|
||||||
|
|
||||||
/* destroy the key mapping */
|
key_mapping_rele(spa, found_km, tag);
|
||||||
if (should_free) {
|
|
||||||
spa_keystore_dsl_key_rele(spa, found_km->km_key, found_km);
|
|
||||||
kmem_free(found_km, sizeof (dsl_key_mapping_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
|
|
|
@ -438,8 +438,8 @@ dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag)
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
|
dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||||
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
|
dsl_dataset_t **dsp)
|
||||||
{
|
{
|
||||||
objset_t *mos = dp->dp_meta_objset;
|
objset_t *mos = dp->dp_meta_objset;
|
||||||
dmu_buf_t *dbuf;
|
dmu_buf_t *dbuf;
|
||||||
|
@ -599,6 +599,7 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3P(ds->ds_dbuf, ==, dbuf);
|
ASSERT3P(ds->ds_dbuf, ==, dbuf);
|
||||||
ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
|
ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data);
|
||||||
ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
|
ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 ||
|
||||||
|
@ -606,22 +607,40 @@ dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
|
||||||
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
|
dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
|
||||||
*dsp = ds;
|
*dsp = ds;
|
||||||
|
|
||||||
if ((flags & DS_HOLD_FLAG_DECRYPT) && ds->ds_dir->dd_crypto_obj != 0) {
|
|
||||||
err = spa_keystore_create_mapping(dp->dp_spa, ds, ds);
|
|
||||||
if (err != 0) {
|
|
||||||
dsl_dataset_rele(ds, tag);
|
|
||||||
return (SET_ERROR(EACCES));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
dsl_dataset_create_key_mapping(dsl_dataset_t *ds)
|
||||||
dsl_dataset_t **dsp)
|
|
||||||
{
|
{
|
||||||
return (dsl_dataset_hold_obj_flags(dp, dsobj, 0, tag, dsp));
|
dsl_dir_t *dd = ds->ds_dir;
|
||||||
|
|
||||||
|
if (dd->dd_crypto_obj == 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
return (spa_keystore_create_mapping(dd->dd_pool->dp_spa,
|
||||||
|
ds, ds, &ds->ds_key_mapping));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
dsl_dataset_hold_obj_flags(dsl_pool_t *dp, uint64_t dsobj,
|
||||||
|
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
|
||||||
|
if (err != 0)
|
||||||
|
return (err);
|
||||||
|
|
||||||
|
ASSERT3P(*dsp, !=, NULL);
|
||||||
|
|
||||||
|
if (flags & DS_HOLD_FLAG_DECRYPT) {
|
||||||
|
err = dsl_dataset_create_key_mapping(*dsp);
|
||||||
|
if (err != 0)
|
||||||
|
dsl_dataset_rele(*dsp, tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (err);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -788,21 +807,30 @@ dsl_dataset_namelen(dsl_dataset_t *ds)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
|
dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
|
||||||
{
|
{
|
||||||
if (ds->ds_dir != NULL && ds->ds_dir->dd_crypto_obj != 0 &&
|
|
||||||
(flags & DS_HOLD_FLAG_DECRYPT)) {
|
|
||||||
(void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa,
|
|
||||||
ds->ds_object, ds);
|
|
||||||
}
|
|
||||||
|
|
||||||
dmu_buf_rele(ds->ds_dbuf, tag);
|
dmu_buf_rele(ds->ds_dbuf, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
|
dsl_dataset_remove_key_mapping(dsl_dataset_t *ds)
|
||||||
{
|
{
|
||||||
dsl_dataset_rele_flags(ds, 0, tag);
|
dsl_dir_t *dd = ds->ds_dir;
|
||||||
|
|
||||||
|
if (dd == NULL || dd->dd_crypto_obj == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
(void) spa_keystore_remove_mapping(dd->dd_pool->dp_spa,
|
||||||
|
ds->ds_object, ds);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
|
||||||
|
{
|
||||||
|
if (flags & DS_HOLD_FLAG_DECRYPT)
|
||||||
|
dsl_dataset_remove_key_mapping(ds);
|
||||||
|
|
||||||
|
dsl_dataset_rele(ds, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1154,8 +1182,18 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||||
|
|
||||||
dp = ds->ds_dir->dd_pool;
|
dp = ds->ds_dir->dd_pool;
|
||||||
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
|
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
|
||||||
|
objset_t *os = ds->ds_objset;
|
||||||
|
|
||||||
/* up the hold count until we can be written out */
|
/* up the hold count until we can be written out */
|
||||||
dmu_buf_add_ref(ds->ds_dbuf, ds);
|
dmu_buf_add_ref(ds->ds_dbuf, ds);
|
||||||
|
|
||||||
|
/* if this dataset is encrypted, grab a reference to the DCK */
|
||||||
|
if (ds->ds_dir->dd_crypto_obj != 0 &&
|
||||||
|
!os->os_raw_receive &&
|
||||||
|
!os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
|
||||||
|
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||||
|
key_mapping_add_ref(ds->ds_key_mapping, ds);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1800,6 +1838,11 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||||
os->os_synced_dnodes = NULL;
|
os->os_synced_dnodes = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (os->os_encrypted)
|
||||||
|
os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
|
||||||
|
else
|
||||||
|
ASSERT0(os->os_next_write_raw[tx->tx_txg & TXG_MASK]);
|
||||||
|
|
||||||
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
|
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
|
||||||
|
|
||||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||||
|
|
|
@ -516,7 +516,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
|
||||||
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
|
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx);
|
||||||
|
|
||||||
/* create the root objset */
|
/* create the root objset */
|
||||||
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
|
VERIFY0(dsl_dataset_hold_obj_flags(dp, obj,
|
||||||
|
DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
{
|
{
|
||||||
objset_t *os;
|
objset_t *os;
|
||||||
|
@ -527,7 +528,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp,
|
||||||
zfs_create_fs(os, kcred, zplprops, tx);
|
zfs_create_fs(os, kcred, zplprops, tx);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
dsl_dataset_rele(ds, FTAG);
|
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
|
@ -690,9 +691,22 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||||
*/
|
*/
|
||||||
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||||
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
|
while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) {
|
||||||
|
objset_t *os = ds->ds_objset;
|
||||||
|
|
||||||
ASSERT(list_link_active(&ds->ds_synced_link));
|
ASSERT(list_link_active(&ds->ds_synced_link));
|
||||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||||
dsl_dataset_sync(ds, zio, tx);
|
dsl_dataset_sync(ds, zio, tx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release any key mappings created by calls to
|
||||||
|
* dsl_dataset_dirty() from the userquota accounting
|
||||||
|
* code paths.
|
||||||
|
*/
|
||||||
|
if (os->os_encrypted && !os->os_raw_receive &&
|
||||||
|
!os->os_next_write_raw[txg & TXG_MASK]) {
|
||||||
|
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||||
|
key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
VERIFY0(zio_wait(zio));
|
VERIFY0(zio_wait(zio));
|
||||||
|
|
||||||
|
@ -702,8 +716,17 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||||
*
|
*
|
||||||
* - move dead blocks from the pending deadlist to the on-disk deadlist
|
* - move dead blocks from the pending deadlist to the on-disk deadlist
|
||||||
* - release hold from dsl_dataset_dirty()
|
* - release hold from dsl_dataset_dirty()
|
||||||
|
* - release key mapping hold from dsl_dataset_dirty()
|
||||||
*/
|
*/
|
||||||
while ((ds = list_remove_head(&synced_datasets)) != NULL) {
|
while ((ds = list_remove_head(&synced_datasets)) != NULL) {
|
||||||
|
objset_t *os = ds->ds_objset;
|
||||||
|
|
||||||
|
if (os->os_encrypted && !os->os_raw_receive &&
|
||||||
|
!os->os_next_write_raw[txg & TXG_MASK]) {
|
||||||
|
ASSERT3P(ds->ds_key_mapping, !=, NULL);
|
||||||
|
key_mapping_rele(dp->dp_spa, ds->ds_key_mapping, ds);
|
||||||
|
}
|
||||||
|
|
||||||
dsl_dataset_sync_done(ds, tx);
|
dsl_dataset_sync_done(ds, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4995,7 +4995,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||||
uint64_t txg = TXG_INITIAL;
|
uint64_t txg = TXG_INITIAL;
|
||||||
nvlist_t **spares, **l2cache;
|
nvlist_t **spares, **l2cache;
|
||||||
uint_t nspares, nl2cache;
|
uint_t nspares, nl2cache;
|
||||||
uint64_t version, obj, root_dsobj = 0;
|
uint64_t version, obj;
|
||||||
boolean_t has_features;
|
boolean_t has_features;
|
||||||
boolean_t has_encryption;
|
boolean_t has_encryption;
|
||||||
spa_feature_t feat;
|
spa_feature_t feat;
|
||||||
|
@ -5249,27 +5249,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
/*
|
|
||||||
* If the root dataset is encrypted we will need to create key mappings
|
|
||||||
* for the zio layer before we start to write any data to disk and hold
|
|
||||||
* them until after the first txg has been synced. Waiting for the first
|
|
||||||
* transaction to complete also ensures that our bean counters are
|
|
||||||
* appropriately updated.
|
|
||||||
*/
|
|
||||||
if (dp->dp_root_dir->dd_crypto_obj != 0) {
|
|
||||||
root_dsobj = dsl_dir_phys(dp->dp_root_dir)->dd_head_dataset_obj;
|
|
||||||
VERIFY0(spa_keystore_create_mapping_impl(spa, root_dsobj,
|
|
||||||
dp->dp_root_dir, FTAG));
|
|
||||||
}
|
|
||||||
|
|
||||||
spa->spa_sync_on = B_TRUE;
|
spa->spa_sync_on = B_TRUE;
|
||||||
txg_sync_start(dp);
|
txg_sync_start(dp);
|
||||||
mmp_thread_start(spa);
|
mmp_thread_start(spa);
|
||||||
txg_wait_synced(dp, txg);
|
txg_wait_synced(dp, txg);
|
||||||
|
|
||||||
if (dp->dp_root_dir->dd_crypto_obj != 0)
|
|
||||||
VERIFY0(spa_keystore_remove_mapping(spa, root_dsobj, FTAG));
|
|
||||||
|
|
||||||
spa_spawn_aux_threads(spa);
|
spa_spawn_aux_threads(spa);
|
||||||
|
|
||||||
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
spa_write_cachefile(spa, B_FALSE, B_TRUE);
|
||||||
|
|
|
@ -3236,8 +3236,8 @@ zil_suspend(const char *osname, void **cookiep)
|
||||||
* grabbing a reference to it. If the key isn't loaded we have no
|
* grabbing a reference to it. If the key isn't loaded we have no
|
||||||
* choice but to return an error until the wrapping key is loaded.
|
* choice but to return an error until the wrapping key is loaded.
|
||||||
*/
|
*/
|
||||||
if (os->os_encrypted && spa_keystore_create_mapping(os->os_spa,
|
if (os->os_encrypted &&
|
||||||
dmu_objset_ds(os), FTAG) != 0) {
|
dsl_dataset_create_key_mapping(dmu_objset_ds(os)) != 0) {
|
||||||
zilog->zl_suspend--;
|
zilog->zl_suspend--;
|
||||||
mutex_exit(&zilog->zl_lock);
|
mutex_exit(&zilog->zl_lock);
|
||||||
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
|
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
|
||||||
|
@ -3259,9 +3259,10 @@ zil_suspend(const char *osname, void **cookiep)
|
||||||
zil_commit_impl(zilog, 0);
|
zil_commit_impl(zilog, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that we've ensured all lwb's are LWB_STATE_DONE, we use
|
* Now that we've ensured all lwb's are LWB_STATE_DONE,
|
||||||
* txg_wait_synced() to ensure the data from the zilog has
|
* txg_wait_synced() will be called from within zil_destroy(),
|
||||||
* migrated to the main pool before calling zil_destroy().
|
* which will ensure the data from the zilog has migrated to the
|
||||||
|
* main pool before it returns.
|
||||||
*/
|
*/
|
||||||
txg_wait_synced(zilog->zl_dmu_pool, 0);
|
txg_wait_synced(zilog->zl_dmu_pool, 0);
|
||||||
|
|
||||||
|
@ -3272,19 +3273,8 @@ zil_suspend(const char *osname, void **cookiep)
|
||||||
cv_broadcast(&zilog->zl_cv_suspend);
|
cv_broadcast(&zilog->zl_cv_suspend);
|
||||||
mutex_exit(&zilog->zl_lock);
|
mutex_exit(&zilog->zl_lock);
|
||||||
|
|
||||||
if (os->os_encrypted) {
|
if (os->os_encrypted)
|
||||||
/*
|
dsl_dataset_remove_key_mapping(dmu_objset_ds(os));
|
||||||
* Encrypted datasets need to wait for all data to be
|
|
||||||
* synced out before removing the mapping.
|
|
||||||
*
|
|
||||||
* XXX: Depending on the number of datasets with
|
|
||||||
* outstanding ZIL data on a given log device, this
|
|
||||||
* might cause spa_offline_log() to take a long time.
|
|
||||||
*/
|
|
||||||
txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);
|
|
||||||
VERIFY0(spa_keystore_remove_mapping(os->os_spa,
|
|
||||||
dmu_objset_id(os), FTAG));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookiep == NULL)
|
if (cookiep == NULL)
|
||||||
zil_resume(os);
|
zil_resume(os);
|
||||||
|
|
Loading…
Reference in New Issue