Call commit callbacks from the tail of the list
Our zfs backed Lustre MDT had soft lockups while under heavy metadata workloads while handling transaction callbacks from osd_zfs. The problem is zfs is not taking advantage of the fast path in Lustre's trans callback handling, where Lustre will skip the calls to ptlrpc_commit_replies() when it already saw a higher transaction number. This patch corrects this, it also has a positive impact on metadata performance on Lustre with osd_zfs, plus some cleanup in the headers. A similar issue for ext4/ldiskfs is described on: https://jira.hpdd.intel.com/browse/LU-6527 Reviewed-by: Olaf Faaland <faaland1@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Li Dongyang <dongyang.li@anu.edu.au> Closes #6986
This commit is contained in:
parent
44b61ea506
commit
823d48bfb1
|
@ -748,11 +748,16 @@ void dmu_tx_mark_netfree(dmu_tx_t *tx);
|
||||||
* to stable storage and will also be called if the dmu_tx is aborted.
|
* to stable storage and will also be called if the dmu_tx is aborted.
|
||||||
* If there is any error which prevents the transaction from being committed to
|
* If there is any error which prevents the transaction from being committed to
|
||||||
* disk, the callback will be called with a value of error != 0.
|
* disk, the callback will be called with a value of error != 0.
|
||||||
|
*
|
||||||
|
* When multiple callbacks are registered to the transaction, the callbacks
|
||||||
|
* will be called in reverse order to let Lustre, the only user of commit
|
||||||
|
* callback currently, take the fast path of its commit callback handling.
|
||||||
*/
|
*/
|
||||||
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
|
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
|
||||||
|
|
||||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||||
void *dcb_data);
|
void *dcb_data);
|
||||||
|
void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Free up the data blocks for a defined range of a file. If size is
|
* Free up the data blocks for a defined range of a file. If size is
|
||||||
|
|
|
@ -145,10 +145,6 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
|
||||||
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
|
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
|
||||||
void dmu_tx_wait(dmu_tx_t *tx);
|
void dmu_tx_wait(dmu_tx_t *tx);
|
||||||
|
|
||||||
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
|
||||||
void *dcb_data);
|
|
||||||
void dmu_tx_do_callbacks(list_t *cb_list, int error);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
* These routines are defined in dmu_spa.h, and are called by the SPA.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1197,7 +1197,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
|
||||||
{
|
{
|
||||||
dmu_tx_callback_t *dcb;
|
dmu_tx_callback_t *dcb;
|
||||||
|
|
||||||
while ((dcb = list_head(cb_list)) != NULL) {
|
while ((dcb = list_tail(cb_list)) != NULL) {
|
||||||
list_remove(cb_list, dcb);
|
list_remove(cb_list, dcb);
|
||||||
dcb->dcb_func(dcb->dcb_data, error);
|
dcb->dcb_func(dcb->dcb_data, error);
|
||||||
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
kmem_free(dcb, sizeof (dmu_tx_callback_t));
|
||||||
|
|
Loading…
Reference in New Issue