Allow MMP to bypass waiting for other threads
At our site we have seen cases when multi-modifier protection is enabled (multihost=on) on our pool and the pool gets suspended due to a single disk that is failing and responding very slowly. Our pools have 90 disks in them and we expect disks to fail. The current version of MMP requires that we wait for other writers before moving on. When a disk is responding very slowly, we observed that waiting here was bad enough to cause the pool to suspend. This change allows the MMP thread to bypass waiting for other threads and reduces the chances the pool gets suspended. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Herb Wartens <hawartens@gmail.com> Closes #14659
This commit is contained in:
parent
cdbe1d65c4
commit
33075e465f
|
@ -972,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
|
||||||
/* Pool configuration locks */
|
/* Pool configuration locks */
|
||||||
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
|
||||||
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
|
extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
|
||||||
|
extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
|
||||||
|
krw_t rw);
|
||||||
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
|
extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
|
||||||
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
|
||||||
|
|
||||||
|
|
|
@ -444,7 +444,7 @@ mmp_write_uberblock(spa_t *spa)
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
|
|
||||||
hrtime_t lock_acquire_time = gethrtime();
|
hrtime_t lock_acquire_time = gethrtime();
|
||||||
spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
|
spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER);
|
||||||
lock_acquire_time = gethrtime() - lock_acquire_time;
|
lock_acquire_time = gethrtime() - lock_acquire_time;
|
||||||
if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
|
if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
|
||||||
zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
|
zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
|
||||||
|
|
|
@ -494,8 +494,9 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
|
||||||
return (1);
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
|
||||||
|
int mmp_flag)
|
||||||
{
|
{
|
||||||
(void) tag;
|
(void) tag;
|
||||||
int wlocks_held = 0;
|
int wlocks_held = 0;
|
||||||
|
@ -510,7 +511,8 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
||||||
continue;
|
continue;
|
||||||
mutex_enter(&scl->scl_lock);
|
mutex_enter(&scl->scl_lock);
|
||||||
if (rw == RW_READER) {
|
if (rw == RW_READER) {
|
||||||
while (scl->scl_writer || scl->scl_write_wanted) {
|
while (scl->scl_writer ||
|
||||||
|
(!mmp_flag && scl->scl_write_wanted)) {
|
||||||
cv_wait(&scl->scl_cv, &scl->scl_lock);
|
cv_wait(&scl->scl_cv, &scl->scl_lock);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -528,6 +530,27 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
||||||
ASSERT3U(wlocks_held, <=, locks);
|
ASSERT3U(wlocks_held, <=, locks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
|
||||||
|
{
|
||||||
|
spa_config_enter_impl(spa, locks, tag, rw, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The spa_config_enter_mmp() allows the mmp thread to cut in front of
|
||||||
|
* outstanding write lock requests. This is needed since the mmp updates are
|
||||||
|
* time sensitive and failure to service them promptly will result in a
|
||||||
|
* suspended pool. This pool suspension has been seen in practice when there is
|
||||||
|
* a single disk in a pool that is responding slowly and presumably about to
|
||||||
|
* fail.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
|
||||||
|
{
|
||||||
|
spa_config_enter_impl(spa, locks, tag, rw, 1);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
spa_config_exit(spa_t *spa, int locks, const void *tag)
|
spa_config_exit(spa_t *spa, int locks, const void *tag)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue