Fix dsl_scan_ds_clone_swapped logic

The was incorrect with respect to swapping dataset IDs both in the
on-disk ZAP object and the in-memory queue.

In both cases, if ds1 was already present, then it would be first
replaced with ds2 and then ds would be replaced back with ds1.
Also, both cases did not properly handle a situation where both ds1 and
ds2 are already queued.  A duplicate insertion would be attempted and
its failure would result in a panic.

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tom Caputi <tcaputi@datto.com>
Signed-off-by: Andriy Gapon <avg@FreeBSD.org>
Closes #9140
Closes #9163
This commit is contained in:
Andriy Gapon 2019-09-18 19:04:45 +03:00 committed by Tony Hutter
parent 63d8f57fe7
commit 12a78fbb4f
1 changed files with 70 additions and 32 deletions

View File

@ -2165,16 +2165,17 @@ ds_clone_swapped_bookmark(dsl_dataset_t *ds1, dsl_dataset_t *ds2,
} }
/* /*
* Called when a parent dataset and its clone are swapped. If we were * Called when an origin dataset and its clone are swapped. If we were
* currently traversing the dataset, we need to switch to traversing the * currently traversing the dataset, we need to switch to traversing the
* newly promoted parent. * newly promoted clone.
*/ */
void void
dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx) dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
{ {
dsl_pool_t *dp = ds1->ds_dir->dd_pool; dsl_pool_t *dp = ds1->ds_dir->dd_pool;
dsl_scan_t *scn = dp->dp_scan; dsl_scan_t *scn = dp->dp_scan;
uint64_t mintxg; uint64_t mintxg1, mintxg2;
boolean_t ds1_queued, ds2_queued;
if (!dsl_scan_is_running(scn)) if (!dsl_scan_is_running(scn))
return; return;
@ -2182,44 +2183,81 @@ dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys.scn_bookmark);
ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark); ds_clone_swapped_bookmark(ds1, ds2, &scn->scn_phys_cached.scn_bookmark);
if (scan_ds_queue_contains(scn, ds1->ds_object, &mintxg)) { /*
scan_ds_queue_remove(scn, ds1->ds_object); * Handle the in-memory scan queue.
scan_ds_queue_insert(scn, ds2->ds_object, mintxg); */
ds1_queued = scan_ds_queue_contains(scn, ds1->ds_object, &mintxg1);
ds2_queued = scan_ds_queue_contains(scn, ds2->ds_object, &mintxg2);
/* Sanity checking. */
if (ds1_queued) {
ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
} }
if (scan_ds_queue_contains(scn, ds2->ds_object, &mintxg)) { if (ds2_queued) {
scan_ds_queue_remove(scn, ds2->ds_object); ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
scan_ds_queue_insert(scn, ds1->ds_object, mintxg); ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
} }
if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, if (ds1_queued && ds2_queued) {
ds1->ds_object, &mintxg) == 0) { /*
int err; * If both are queued, we don't need to do anything.
ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg); * The swapping code below would not handle this case correctly,
ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg); * since we can't insert ds2 if it is already there. That's
VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, * because scan_ds_queue_insert() prohibits a duplicate insert
* and panics.
*/
} else if (ds1_queued) {
scan_ds_queue_remove(scn, ds1->ds_object);
scan_ds_queue_insert(scn, ds2->ds_object, mintxg1);
} else if (ds2_queued) {
scan_ds_queue_remove(scn, ds2->ds_object);
scan_ds_queue_insert(scn, ds1->ds_object, mintxg2);
}
/*
* Handle the on-disk scan queue.
* The on-disk state is an out-of-date version of the in-memory state,
* so the in-memory and on-disk values for ds1_queued and ds2_queued may
* be different. Therefore we need to apply the swap logic to the
* on-disk state independently of the in-memory state.
*/
ds1_queued = zap_lookup_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds1->ds_object, &mintxg1) == 0;
ds2_queued = zap_lookup_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg2) == 0;
/* Sanity checking. */
if (ds1_queued) {
ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
ASSERT3U(mintxg1, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
}
if (ds2_queued) {
ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
ASSERT3U(mintxg2, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
}
if (ds1_queued && ds2_queued) {
/*
* If both are queued, we don't need to do anything.
* Alternatively, we could check for EEXIST from
* zap_add_int_key() and back out to the original state, but
* that would be more work than checking for this case upfront.
*/
} else if (ds1_queued) {
VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds1->ds_object, tx)); scn->scn_phys.scn_queue_obj, ds1->ds_object, tx));
err = zap_add_int_key(dp->dp_meta_objset, VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx); scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg1, tx));
VERIFY(err == 0 || err == EEXIST);
if (err == EEXIST) {
/* Both were there to begin with */
VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj,
ds1->ds_object, mintxg, tx));
}
zfs_dbgmsg("clone_swap ds %llu; in queue; " zfs_dbgmsg("clone_swap ds %llu; in queue; "
"replacing with %llu", "replacing with %llu",
(u_longlong_t)ds1->ds_object, (u_longlong_t)ds1->ds_object,
(u_longlong_t)ds2->ds_object); (u_longlong_t)ds2->ds_object);
} } else if (ds2_queued) {
if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj, VERIFY3S(0, ==, zap_remove_int(dp->dp_meta_objset,
ds2->ds_object, &mintxg) == 0) {
ASSERT3U(mintxg, ==, dsl_dataset_phys(ds1)->ds_prev_snap_txg);
ASSERT3U(mintxg, ==, dsl_dataset_phys(ds2)->ds_prev_snap_txg);
VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds2->ds_object, tx)); scn->scn_phys.scn_queue_obj, ds2->ds_object, tx));
VERIFY(0 == zap_add_int_key(dp->dp_meta_objset, VERIFY3S(0, ==, zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx)); scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg2, tx));
zfs_dbgmsg("clone_swap ds %llu; in queue; " zfs_dbgmsg("clone_swap ds %llu; in queue; "
"replacing with %llu", "replacing with %llu",
(u_longlong_t)ds2->ds_object, (u_longlong_t)ds2->ds_object,