Call commit callbacks from the tail of the list
Our zfs backed Lustre MDT had soft lockups while under heavy metadata workloads while handling transaction callbacks from osd_zfs. The problem is zfs is not taking advantage of the fast path in Lustre's trans callback handling, where Lustre will skip the calls to ptlrpc_commit_replies() when it already saw a higher transaction number. This patch corrects this, it also has a positive impact on metadata performance on Lustre with osd_zfs, plus some cleanup in the headers. A similar issue for ext4/ldiskfs is described on: https://jira.hpdd.intel.com/browse/LU-6527 Reviewed-by: Olaf Faaland <faaland1@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Li Dongyang <dongyang.li@anu.edu.au> Closes #6986
This commit is contained in:
parent
c2aacf2087
commit
8d82a19def
|
@ -713,11 +713,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
|
|||
* to stable storage and will also be called if the dmu_tx is aborted.
|
||||
* If there is any error which prevents the transaction from being committed to
|
||||
* disk, the callback will be called with a value of error != 0.
|
||||
*
|
||||
* When multiple callbacks are registered to the transaction, the callbacks
|
||||
* will be called in reverse order to let Lustre, the only user of commit
|
||||
* callback currently, take the fast path of its commit callback handling.
|
||||
*/
|
||||
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
|
||||
|
||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||
void *dcb_data);
|
||||
void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
|
|
|
@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
|||
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
|
||||
void dmu_tx_wait(dmu_tx_t *tx);
|
||||
|
||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||
void *dcb_data);
|
||||
void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||||
|
||||
/*
|
||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||
*/
|
||||
|
|
|
@ -1200,7 +1200,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
|
|||
{
|
||||
dmu_tx_callback_t *dcb;
|
||||
|
||||
while ((dcb = list_head(cb_list)) != NULL) {
|
||||
while ((dcb = list_tail(cb_list)) != NULL) {
|
||||
list_remove(cb_list, dcb);
|
||||
dcb->dcb_func(dcb->dcb_data, error);
|
||||
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
||||
|
|
Loading…
Reference in New Issue