Range lock performance improvements

The original range lock implementation had to be modified by commit
8926ab7 because it was unsafe on Linux.  In particular, calling
cv_destroy() immediately after cv_broadcast() is dangerous because
the waiters may still be asleep.  Thus the following cv_destroy()
will free memory which may still be in use.

This was fixed by updating cv_destroy() to block on waiters but
this in turn introduced a deadlock.  The deadlock was resolved
with the use of a taskq to move the offending free outside the
range lock.  This worked well but using the taskq for the free
resulted in a serious performace hit.  This is somewhat ironic
because at the time I felt using the taskq might improve things
by making the free asynchronous.

This patch refines the original fix and moves the free from the
taskq to a private free list.  Then items which must be free'd
are simply inserted in to the list.  When the range lock is dropped
it's safe to free the items.  The list is walked and all rl_t
entries are freed.

This change improves small cached read performance by 26x.  This
was expected because for small reads the number of locking calls
goes up significantly.  More surprisingly this change significantly
improves large cache read performance.  This probably attributable
to better cpu/memory locality.  Very likely the same processor
which allocated the memory is now freeing it.

bs	ext3	zfs	zfs+fix		faster
----------------------------------------------
512     435     3       79      	26x
1k      820     7       160     	22x
2k      1536    14      305     	21x
4k      2764    28      572     	20x
8k      3788    50      1024    	20x
16k     4300    86      1843    	21x
32k     4505    138     2560    	18x
64k     5324    252     3891    	15x
128k    5427    276     4710    	17x
256k    5427    413     5017    	12x
512k    5427    497     5324    	10x
1m      5427    521     5632    	10x

Closes #142
This commit is contained in:
Brian Behlendorf 2011-03-08 12:17:35 -08:00
parent 126400a1ca
commit 450dc149bd
2 changed files with 16 additions and 6 deletions

View File

@ -54,6 +54,7 @@ typedef struct rl {
uint8_t r_proxy; /* acting for original range */ uint8_t r_proxy; /* acting for original range */
uint8_t r_write_wanted; /* writer wants to lock this range */ uint8_t r_write_wanted; /* writer wants to lock this range */
uint8_t r_read_wanted; /* reader wants to lock this range */ uint8_t r_read_wanted; /* reader wants to lock this range */
list_node_t rl_node; /* used for deferred release */
} rl_t; } rl_t;
/* /*

View File

@ -471,7 +471,7 @@ zfs_range_free(void *arg)
* Unlock a reader lock * Unlock a reader lock
*/ */
static void static void
zfs_range_unlock_reader(znode_t *zp, rl_t *remove) zfs_range_unlock_reader(znode_t *zp, rl_t *remove, list_t *free_list)
{ {
avl_tree_t *tree = &zp->z_range_avl; avl_tree_t *tree = &zp->z_range_avl;
rl_t *rl, *next = NULL; rl_t *rl, *next = NULL;
@ -493,7 +493,7 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
if (remove->r_read_wanted) if (remove->r_read_wanted)
cv_broadcast(&remove->r_rd_cv); cv_broadcast(&remove->r_rd_cv);
taskq_dispatch(system_taskq, zfs_range_free, remove, 0); list_insert_tail(free_list, remove);
} else { } else {
ASSERT3U(remove->r_cnt, ==, 0); ASSERT3U(remove->r_cnt, ==, 0);
ASSERT3U(remove->r_write_wanted, ==, 0); ASSERT3U(remove->r_write_wanted, ==, 0);
@ -526,8 +526,7 @@ zfs_range_unlock_reader(znode_t *zp, rl_t *remove)
if (rl->r_read_wanted) if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv); cv_broadcast(&rl->r_rd_cv);
taskq_dispatch(system_taskq, list_insert_tail(free_list, rl);
zfs_range_free, rl, 0);
} }
} }
@ -543,10 +542,13 @@ void
zfs_range_unlock(rl_t *rl) zfs_range_unlock(rl_t *rl)
{ {
znode_t *zp = rl->r_zp; znode_t *zp = rl->r_zp;
list_t free_list;
rl_t *free_rl;
ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER); ASSERT(rl->r_type == RL_WRITER || rl->r_type == RL_READER);
ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0); ASSERT(rl->r_cnt == 1 || rl->r_cnt == 0);
ASSERT(!rl->r_proxy); ASSERT(!rl->r_proxy);
list_create(&free_list, sizeof(rl_t), offsetof(rl_t, rl_node));
mutex_enter(&zp->z_range_lock); mutex_enter(&zp->z_range_lock);
if (rl->r_type == RL_WRITER) { if (rl->r_type == RL_WRITER) {
@ -559,14 +561,21 @@ zfs_range_unlock(rl_t *rl)
if (rl->r_read_wanted) if (rl->r_read_wanted)
cv_broadcast(&rl->r_rd_cv); cv_broadcast(&rl->r_rd_cv);
taskq_dispatch(system_taskq, zfs_range_free, rl, 0); list_insert_tail(&free_list, rl);
} else { } else {
/* /*
* lock may be shared, let zfs_range_unlock_reader() * lock may be shared, let zfs_range_unlock_reader()
* release the zp->z_range_lock lock and free the rl_t * release the zp->z_range_lock lock and free the rl_t
*/ */
zfs_range_unlock_reader(zp, rl); zfs_range_unlock_reader(zp, rl, &free_list);
} }
while ((free_rl = list_head(&free_list)) != NULL) {
list_remove(&free_list, free_rl);
zfs_range_free(free_rl);
}
list_destroy(&free_list);
} }
/* /*