Fix commit callbacks

The upstream commit cb code had a few bugs:

1) The arguments of the list_move_tail() call in txg_dispatch_callbacks()
were reversed by mistake. This caused the commit callbacks to not be
called at all.

2) ztest had a bug in ztest_dmu_commit_callbacks() where "error" was not
initialized correctly. This seems to have caused the test to always take
the simulated error code path, which made ztest unable to detect whether
commit cbs were being called for transactions that successfuly complete.

3) ztest had another bug in ztest_dmu_commit_callbacks() where the commit
cb threshold was not being compared correctly.

4) The commit cb taskq was using 'max_ncpus * 2' as the maxalloc argument
of taskq_create(), which could have caused unnecessary delays in the txg
sync thread.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Ricardo M. Correia 2010-08-26 10:17:18 -07:00 committed by Brian Behlendorf
parent a6098088eb
commit 090ff0929e
2 changed files with 35 additions and 18 deletions

View File

@ -341,6 +341,22 @@ static boolean_t ztest_exiting;
/* Global commit callback list */ /* Global commit callback list */
static ztest_cb_list_t zcl; static ztest_cb_list_t zcl;
/* Commit cb delay */
static uint64_t zc_min_txg_delay = UINT64_MAX;
static int zc_cb_counter = 0;
/*
* Minimum number of commit callbacks that need to be registered for us to check
* whether the minimum txg delay is acceptable.
*/
#define ZTEST_COMMIT_CB_MIN_REG 100
/*
* If a number of txgs equal to this threshold have been created after a commit
* callback has been registered but not called, then we assume there is an
* implementation bug.
*/
#define ZTEST_COMMIT_CB_THRESH (TXG_CONCURRENT_STATES + 1000)
extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold; extern uint64_t metaslab_df_alloc_threshold;
@ -4092,18 +4108,20 @@ ztest_commit_callback(void *arg, int error)
return; return;
} }
/* Was this callback added to the global callback list? */ ASSERT(data->zcd_added);
if (!data->zcd_added)
goto out;
ASSERT3U(data->zcd_txg, !=, 0); ASSERT3U(data->zcd_txg, !=, 0);
/* Remove our callback from the list */
(void) mutex_lock(&zcl.zcl_callbacks_lock); (void) mutex_lock(&zcl.zcl_callbacks_lock);
/* See if this cb was called more quickly */
if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
zc_min_txg_delay = synced_txg - data->zcd_txg;
/* Remove our callback from the list */
list_remove(&zcl.zcl_callbacks, data); list_remove(&zcl.zcl_callbacks, data);
(void) mutex_unlock(&zcl.zcl_callbacks_lock); (void) mutex_unlock(&zcl.zcl_callbacks_lock);
out:
umem_free(data, sizeof (ztest_cb_data_t)); umem_free(data, sizeof (ztest_cb_data_t));
} }
@ -4121,13 +4139,6 @@ ztest_create_cb_data(objset_t *os, uint64_t txg)
return (cb_data); return (cb_data);
} }
/*
* If a number of txgs equal to this threshold have been created after a commit
* callback has been registered but not called, then we assume there is an
* implementation bug.
*/
#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2)
/* /*
* Commit callback test. * Commit callback test.
*/ */
@ -4139,7 +4150,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
dmu_tx_t *tx; dmu_tx_t *tx;
ztest_cb_data_t *cb_data[3], *tmp_cb; ztest_cb_data_t *cb_data[3], *tmp_cb;
uint64_t old_txg, txg; uint64_t old_txg, txg;
int i, error; int i, error = 0;
ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0); ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
@ -4219,7 +4230,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
*/ */
tmp_cb = list_head(&zcl.zcl_callbacks); tmp_cb = list_head(&zcl.zcl_callbacks);
if (tmp_cb != NULL && if (tmp_cb != NULL &&
tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) { tmp_cb->zcd_txg + ZTEST_COMMIT_CB_THRESH < txg) {
fatal(0, "Commit callback threshold exceeded, oldest txg: %" fatal(0, "Commit callback threshold exceeded, oldest txg: %"
PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg); PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
} }
@ -4250,6 +4261,8 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
tmp_cb = cb_data[i]; tmp_cb = cb_data[i];
} }
zc_cb_counter += 3;
(void) mutex_unlock(&zcl.zcl_callbacks_lock); (void) mutex_unlock(&zcl.zcl_callbacks_lock);
dmu_tx_commit(tx); dmu_tx_commit(tx);
@ -5256,6 +5269,10 @@ ztest_run(ztest_shared_t *zs)
for (uint64_t object = 1; object < 50; object++) for (uint64_t object = 1; object < 50; object++)
dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20); dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
/* Verify that at least one commit cb was called in a timely fashion */
if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
VERIFY3U(zc_min_txg_delay, ==, 0);
spa_close(spa, FTAG); spa_close(spa, FTAG);
/* /*

View File

@ -335,15 +335,15 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
* Commit callback taskq hasn't been created yet. * Commit callback taskq hasn't been created yet.
*/ */
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb", tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2, 100, minclsyspri, max_ncpus, INT_MAX,
TASKQ_PREPOPULATE); TASKQ_THREADS_CPU_PCT | TASKQ_PREPOPULATE);
} }
cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP); cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
list_create(cb_list, sizeof (dmu_tx_callback_t), list_create(cb_list, sizeof (dmu_tx_callback_t),
offsetof(dmu_tx_callback_t, dcb_node)); offsetof(dmu_tx_callback_t, dcb_node));
list_move_tail(&tc->tc_callbacks[g], cb_list); list_move_tail(cb_list, &tc->tc_callbacks[g]);
(void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *) (void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *)
txg_do_callbacks, cb_list, TQ_SLEEP); txg_do_callbacks, cb_list, TQ_SLEEP);