ZIL claiming should not start user accounting

Currently, ZIL claiming dirties objsets which causes
dsl_pool_sync() to attempt to perform user accounting on
them. This causes problems for encrypted datasets that were
raw received before the system went offline since they
cannot perform user accounting until they have their keys
loaded. This triggers an ASSERT in zio_encrypt(). Since
encryption was added, the code now depends on the fact that
data should only be written when objsets are owned. This
patch adds a check in dmu_objset_do_userquota_updates()
to ensure that useraccounting is only done when the objsets
are actually owned for write. As part of this work, the
zfsvfs and zvol code was updated so that it no longer lies
about owning objsets readonly.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #6916 
Closes #7163
This commit is contained in:
Tom Caputi 2018-02-20 19:27:31 -05:00 committed by Brian Behlendorf
parent cbce581353
commit 163a8c28dd
5 changed files with 44 additions and 38 deletions

View File

@ -204,7 +204,7 @@ extern boolean_t zfs_id_overobjquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
extern boolean_t zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj, extern boolean_t zfs_id_overquota(zfsvfs_t *zfsvfs, uint64_t usedobj,
uint64_t id); uint64_t id);
extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers); extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp); extern int zfsvfs_create(const char *name, boolean_t readony, zfsvfs_t **zfvp);
extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os); extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
extern void zfsvfs_free(zfsvfs_t *zfsvfs); extern void zfsvfs_free(zfsvfs_t *zfsvfs);
extern int zfs_check_global_label(const char *dsname, const char *hexsl); extern int zfs_check_global_label(const char *dsname, const char *hexsl);

View File

@ -726,9 +726,15 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
return (err); return (err);
} }
/* user accounting requires the dataset to be decrypted */ /*
* User accounting requires the dataset to be decrypted and rw.
* We also don't begin user accounting during claiming to help
* speed up pool import times and to keep this txg reserved
* completely for recovery work.
*/
if ((dmu_objset_userobjspace_upgradable(*osp) || if ((dmu_objset_userobjspace_upgradable(*osp) ||
dmu_objset_projectquota_upgradable(*osp)) && dmu_objset_projectquota_upgradable(*osp)) &&
!readonly && !dp->dp_spa->spa_claiming &&
(ds->ds_dir->dd_crypto_obj == 0 || decrypt)) (ds->ds_dir->dd_crypto_obj == 0 || decrypt))
dmu_objset_id_quota_upgrade(*osp); dmu_objset_id_quota_upgrade(*osp);
@ -1897,10 +1903,19 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
if (!dmu_objset_userused_enabled(os)) if (!dmu_objset_userused_enabled(os))
return; return;
/* if this is a raw receive just return and handle accounting later */ /*
* If this is a raw receive just return and handle accounting
* later when we have the keys loaded. We also don't do user
* accounting during claiming since the datasets are not owned
* for the duration of claiming and this txg should only be
* used for recovery.
*/
if (os->os_encrypted && dmu_objset_is_receiving(os)) if (os->os_encrypted && dmu_objset_is_receiving(os))
return; return;
if (tx->tx_txg <= os->os_spa->spa_claim_max_txg)
return;
/* Allocate the user/group/project used objects if necessary. */ /* Allocate the user/group/project used objects if necessary. */
if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) { if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
VERIFY0(zap_create_claim(os, VERIFY0(zap_create_claim(os,

View File

@ -1472,7 +1472,7 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
int error = 0; int error = 0;
if (getzfsvfs(name, zfvp) != 0) if (getzfsvfs(name, zfvp) != 0)
error = zfsvfs_create(name, zfvp); error = zfsvfs_create(name, B_FALSE, zfvp);
if (error == 0) { if (error == 0) {
rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER : rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
RW_READER, tag); RW_READER, tag);

View File

@ -1136,21 +1136,16 @@ zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
} }
int int
zfsvfs_create(const char *osname, zfsvfs_t **zfvp) zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
{ {
objset_t *os; objset_t *os;
zfsvfs_t *zfsvfs; zfsvfs_t *zfsvfs;
int error; int error;
boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
/* error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, &os);
* We claim to always be readonly so we can open snapshots;
* other ZPL code will prevent us from writing to snapshots.
*/
error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, B_TRUE,
zfsvfs, &os);
if (error != 0) { if (error != 0) {
kmem_free(zfsvfs, sizeof (zfsvfs_t)); kmem_free(zfsvfs, sizeof (zfsvfs_t));
return (error); return (error);
@ -1209,14 +1204,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
int error; int error;
boolean_t readonly = zfs_is_readonly(zfsvfs); boolean_t readonly = zfs_is_readonly(zfsvfs);
/*
* Check for a bad on-disk format version now since we
* lied about owning the dataset readonly before.
*/
if (!readonly &&
dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
return (SET_ERROR(EROFS));
error = zfs_register_callbacks(zfsvfs->z_vfs); error = zfs_register_callbacks(zfsvfs->z_vfs);
if (error) if (error)
return (error); return (error);
@ -1786,24 +1773,30 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
struct inode *root_inode; struct inode *root_inode;
uint64_t recordsize; uint64_t recordsize;
int error = 0; int error = 0;
zfsvfs_t *zfsvfs; zfsvfs_t *zfsvfs = NULL;
vfs_t *vfs = NULL;
ASSERT(zm); ASSERT(zm);
ASSERT(osname); ASSERT(osname);
error = zfsvfs_create(osname, &zfsvfs); error = zfsvfs_parse_options(zm->mnt_data, &vfs);
if (error) if (error)
return (error); return (error);
error = zfsvfs_parse_options(zm->mnt_data, &zfsvfs->z_vfs); error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
if (error) if (error) {
zfsvfs_vfs_free(vfs);
goto out; goto out;
}
if ((error = dsl_prop_get_integer(osname, "recordsize", if ((error = dsl_prop_get_integer(osname, "recordsize",
&recordsize, NULL))) &recordsize, NULL))) {
zfsvfs_vfs_free(vfs);
goto out; goto out;
}
zfsvfs->z_vfs->vfs_data = zfsvfs; vfs->vfs_data = zfsvfs;
zfsvfs->z_vfs = vfs;
zfsvfs->z_sb = sb; zfsvfs->z_sb = sb;
sb->s_fs_info = zfsvfs; sb->s_fs_info = zfsvfs;
sb->s_magic = ZFS_SUPER_MAGIC; sb->s_magic = ZFS_SUPER_MAGIC;
@ -1875,8 +1868,10 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb); zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
out: out:
if (error) { if (error) {
if (zfsvfs != NULL) {
dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
zfsvfs_free(zfsvfs); zfsvfs_free(zfsvfs);
}
/* /*
* make sure we don't have dangling sb->s_fs_info which * make sure we don't have dangling sb->s_fs_info which
* zfs_preumount will use. * zfs_preumount will use.

View File

@ -1289,10 +1289,11 @@ zvol_resume(zvol_state_t *zv)
} }
static int static int
zvol_first_open(zvol_state_t *zv) zvol_first_open(zvol_state_t *zv, boolean_t readonly)
{ {
objset_t *os; objset_t *os;
int error, locked = 0; int error, locked = 0;
boolean_t ro;
ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ASSERT(MUTEX_HELD(&zv->zv_state_lock));
@ -1321,8 +1322,8 @@ zvol_first_open(zvol_state_t *zv)
return (-SET_ERROR(ERESTARTSYS)); return (-SET_ERROR(ERESTARTSYS));
} }
/* lie and say we're read-only */ ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, 1, zv, &os); error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
if (error) if (error)
goto out_mutex; goto out_mutex;
@ -1401,17 +1402,12 @@ zvol_open(struct block_device *bdev, fmode_t flag)
ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock)); ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
if (zv->zv_open_count == 0) { if (zv->zv_open_count == 0) {
error = zvol_first_open(zv); error = zvol_first_open(zv, !(flag & FMODE_WRITE));
if (error) if (error)
goto out_mutex; goto out_mutex;
} }
/* if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
* Check for a bad on-disk format version now since we
* lied about owning the dataset readonly before.
*/
if ((flag & FMODE_WRITE) && ((zv->zv_flags & ZVOL_RDONLY) ||
dmu_objset_incompatible_encryption_version(zv->zv_objset))) {
error = -EROFS; error = -EROFS;
goto out_open_count; goto out_open_count;
} }