Don't acquire zthr_request_lock in zthr_wakeup
Address a deadlock caused by simultaneous wakeup and cancel on a zthr by remove the hold of zthr_request_lock from zthr_wakeup. This allows thr_wakeup to not block a thread that is in the process of being cancelled. Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: Sara Hartse <sara.hartse@delphix.com> Closes #8333
This commit is contained in:
parent
21e7cf5da8
commit
2747f599ff
|
@ -92,7 +92,16 @@
|
||||||
* user should take this into account when writing a checkfunc.
|
* user should take this into account when writing a checkfunc.
|
||||||
* [see ZTHR state transitions]
|
* [see ZTHR state transitions]
|
||||||
*
|
*
|
||||||
* == ZTHR cancellation
|
* == ZTHR wakeup
|
||||||
|
*
|
||||||
|
* ZTHR wakeup should be used when new work is added for the zthr. The
|
||||||
|
* sleeping zthr will wakeup, see that it has more work to complete
|
||||||
|
* and proceed. This can be invoked from open or syncing context.
|
||||||
|
*
|
||||||
|
* To wakeup a zthr:
|
||||||
|
* zthr_wakeup(zthr_t *t)
|
||||||
|
*
|
||||||
|
* == ZTHR cancellation and resumption
|
||||||
*
|
*
|
||||||
* ZTHR threads must be cancelled when their SPA is being exported
|
* ZTHR threads must be cancelled when their SPA is being exported
|
||||||
* or when they need to be paused so they don't interfere with other
|
* or when they need to be paused so they don't interfere with other
|
||||||
|
@ -104,6 +113,9 @@
|
||||||
* To resume it:
|
* To resume it:
|
||||||
* zthr_resume(zthr_pointer);
|
* zthr_resume(zthr_pointer);
|
||||||
*
|
*
|
||||||
|
* ZTHR cancel and resume should be invoked in open context during the
|
||||||
|
* lifecycle of the pool as it is imported, exported or destroyed.
|
||||||
|
*
|
||||||
* A zthr will implicitly check if it has received a cancellation
|
* A zthr will implicitly check if it has received a cancellation
|
||||||
* signal every time func returns and every time it wakes up [see
|
* signal every time func returns and every time it wakes up [see
|
||||||
* ZTHR state transitions below].
|
* ZTHR state transitions below].
|
||||||
|
@ -161,15 +173,19 @@
|
||||||
*
|
*
|
||||||
* == Implementation of ZTHR requests
|
* == Implementation of ZTHR requests
|
||||||
*
|
*
|
||||||
* ZTHR wakeup, cancel, and resume are requests on a zthr to
|
* ZTHR cancel and resume are requests on a zthr to change its
|
||||||
* change its internal state. Requests on a zthr are serialized
|
* internal state. These requests are serialized using the
|
||||||
* using the zthr_request_lock, while changes in its internal
|
* zthr_request_lock, while changes in its internal state are
|
||||||
* state are protected by the zthr_state_lock. A request will
|
* protected by the zthr_state_lock. A request will first acquire
|
||||||
* first acquire the zthr_request_lock and then immediately
|
* the zthr_request_lock and then immediately acquire the
|
||||||
* acquire the zthr_state_lock. We do this so that incoming
|
* zthr_state_lock. We do this so that incoming requests are
|
||||||
* requests are serialized using the request lock, while still
|
* serialized using the request lock, while still allowing us
|
||||||
* allowing us to use the state lock for thread communication
|
* to use the state lock for thread communication via zthr_cv.
|
||||||
* via zthr_cv.
|
*
|
||||||
|
* ZTHR wakeup broadcasts to zthr_cv, causing sleeping threads
|
||||||
|
* to wakeup. It acquires the zthr_state_lock but not the
|
||||||
|
* zthr_request_lock, so that a wakeup on a zthr in the middle
|
||||||
|
* of being cancelled will not block.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
|
@ -287,17 +303,16 @@ zthr_destroy(zthr_t *t)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake up the zthr if it is sleeping. If the thread has been
|
* Wake up the zthr if it is sleeping. If the thread has been cancelled
|
||||||
* cancelled that does nothing.
|
* or is in the process of being cancelled, this is a no-op.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zthr_wakeup(zthr_t *t)
|
zthr_wakeup(zthr_t *t)
|
||||||
{
|
{
|
||||||
mutex_enter(&t->zthr_request_lock);
|
|
||||||
mutex_enter(&t->zthr_state_lock);
|
mutex_enter(&t->zthr_state_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* There are 4 states that we can find the zthr when issuing
|
* There are 5 states that we can find the zthr when issuing
|
||||||
* this broadcast:
|
* this broadcast:
|
||||||
*
|
*
|
||||||
* [1] The common case of the thread being asleep, at which
|
* [1] The common case of the thread being asleep, at which
|
||||||
|
@ -310,17 +325,19 @@ zthr_wakeup(zthr_t *t)
|
||||||
* is basically a no-op.
|
* is basically a no-op.
|
||||||
* [4] The thread was just created/resumed, in which case the
|
* [4] The thread was just created/resumed, in which case the
|
||||||
* behavior is similar to [3].
|
* behavior is similar to [3].
|
||||||
|
* [5] The thread is in the middle of being cancelled, which
|
||||||
|
* will be a no-op.
|
||||||
*/
|
*/
|
||||||
cv_broadcast(&t->zthr_cv);
|
cv_broadcast(&t->zthr_cv);
|
||||||
|
|
||||||
mutex_exit(&t->zthr_state_lock);
|
mutex_exit(&t->zthr_state_lock);
|
||||||
mutex_exit(&t->zthr_request_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sends a cancel request to the zthr and blocks until the zthr is
|
* Sends a cancel request to the zthr and blocks until the zthr is
|
||||||
* cancelled. If the zthr is not running (e.g. has been cancelled
|
* cancelled. If the zthr is not running (e.g. has been cancelled
|
||||||
* already), this is a no-op.
|
* already), this is a no-op. Note that this function should not be
|
||||||
|
* called from syncing context as it could deadlock with the zthr_func.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zthr_cancel(zthr_t *t)
|
zthr_cancel(zthr_t *t)
|
||||||
|
@ -363,8 +380,9 @@ zthr_cancel(zthr_t *t)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sends a resume request to the supplied zthr. If the zthr is
|
* Sends a resume request to the supplied zthr. If the zthr is already
|
||||||
* already running this is a no-op.
|
* running this is a no-op. Note that this function should not be
|
||||||
|
* called from syncing context as it could deadlock with the zthr_func.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zthr_resume(zthr_t *t)
|
zthr_resume(zthr_t *t)
|
||||||
|
|
Loading…
Reference in New Issue