btree: Implement faster binary search algorithm
This implements a binary search algorithm for B-Trees that reduces branching to the absolute minimum necessary for a binary search algorithm. It also enables the compiler to inline the comparator to ensure that the only slowdown when doing binary search is from waiting for memory accesses. Additionally, it instructs the compiler to unroll the loop, which gives an additional 40% improve with Clang and 8% improvement with GCC. Consumers must opt into using the faster algorithm. At present, only B-Trees used inside kernel code have been modified to use the faster algorithm. Micro-benchmarks suggest that this can improve binary search performance by up to 3.5 times when compiling with Clang 16 and up to 1.9 times when compiling with GCC 12.2. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Richard Yao <richard.yao@alumni.stonybrook.edu> Closes #14866
This commit is contained in:
parent
bb736d98d1
commit
677c6f8457
|
@ -326,7 +326,7 @@ sublivelist_verify_func(void *args, dsl_deadlist_entry_t *dle)
|
||||||
int err;
|
int err;
|
||||||
struct sublivelist_verify *sv = args;
|
struct sublivelist_verify *sv = args;
|
||||||
|
|
||||||
zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare,
|
zfs_btree_create(&sv->sv_pair, sublivelist_block_refcnt_compare, NULL,
|
||||||
sizeof (sublivelist_verify_block_refcnt_t));
|
sizeof (sublivelist_verify_block_refcnt_t));
|
||||||
|
|
||||||
err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
|
err = bpobj_iterate_nofree(&dle->dle_bpobj, sublivelist_verify_blkptr,
|
||||||
|
@ -390,7 +390,7 @@ sublivelist_verify_lightweight(void *args, dsl_deadlist_entry_t *dle)
|
||||||
{
|
{
|
||||||
(void) args;
|
(void) args;
|
||||||
sublivelist_verify_t sv;
|
sublivelist_verify_t sv;
|
||||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
|
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
|
||||||
sizeof (sublivelist_verify_block_t));
|
sizeof (sublivelist_verify_block_t));
|
||||||
int err = sublivelist_verify_func(&sv, dle);
|
int err = sublivelist_verify_func(&sv, dle);
|
||||||
zfs_btree_clear(&sv.sv_leftover);
|
zfs_btree_clear(&sv.sv_leftover);
|
||||||
|
@ -682,7 +682,7 @@ livelist_metaslab_validate(spa_t *spa)
|
||||||
(void) printf("Verifying deleted livelist entries\n");
|
(void) printf("Verifying deleted livelist entries\n");
|
||||||
|
|
||||||
sublivelist_verify_t sv;
|
sublivelist_verify_t sv;
|
||||||
zfs_btree_create(&sv.sv_leftover, livelist_block_compare,
|
zfs_btree_create(&sv.sv_leftover, livelist_block_compare, NULL,
|
||||||
sizeof (sublivelist_verify_block_t));
|
sizeof (sublivelist_verify_block_t));
|
||||||
iterate_deleted_livelists(spa, livelist_verify, &sv);
|
iterate_deleted_livelists(spa, livelist_verify, &sv);
|
||||||
|
|
||||||
|
@ -716,7 +716,7 @@ livelist_metaslab_validate(spa_t *spa)
|
||||||
mv.mv_start = m->ms_start;
|
mv.mv_start = m->ms_start;
|
||||||
mv.mv_end = m->ms_start + m->ms_size;
|
mv.mv_end = m->ms_start + m->ms_size;
|
||||||
zfs_btree_create(&mv.mv_livelist_allocs,
|
zfs_btree_create(&mv.mv_livelist_allocs,
|
||||||
livelist_block_compare,
|
livelist_block_compare, NULL,
|
||||||
sizeof (sublivelist_verify_block_t));
|
sizeof (sublivelist_verify_block_t));
|
||||||
|
|
||||||
mv_populate_livelist_allocs(&mv, &sv);
|
mv_populate_livelist_allocs(&mv, &sv);
|
||||||
|
|
|
@ -105,8 +105,13 @@ typedef struct zfs_btree_index {
|
||||||
boolean_t bti_before;
|
boolean_t bti_before;
|
||||||
} zfs_btree_index_t;
|
} zfs_btree_index_t;
|
||||||
|
|
||||||
typedef struct btree {
|
typedef struct btree zfs_btree_t;
|
||||||
|
typedef void * (*bt_find_in_buf_f) (zfs_btree_t *, uint8_t *, uint32_t,
|
||||||
|
const void *, zfs_btree_index_t *);
|
||||||
|
|
||||||
|
struct btree {
|
||||||
int (*bt_compar) (const void *, const void *);
|
int (*bt_compar) (const void *, const void *);
|
||||||
|
bt_find_in_buf_f bt_find_in_buf;
|
||||||
size_t bt_elem_size;
|
size_t bt_elem_size;
|
||||||
size_t bt_leaf_size;
|
size_t bt_leaf_size;
|
||||||
uint32_t bt_leaf_cap;
|
uint32_t bt_leaf_cap;
|
||||||
|
@ -115,7 +120,54 @@ typedef struct btree {
|
||||||
uint64_t bt_num_nodes;
|
uint64_t bt_num_nodes;
|
||||||
zfs_btree_hdr_t *bt_root;
|
zfs_btree_hdr_t *bt_root;
|
||||||
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
|
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
|
||||||
} zfs_btree_t;
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Implementation of Shar's algorithm designed to accelerate binary search by
|
||||||
|
* eliminating impossible to predict branches.
|
||||||
|
*
|
||||||
|
* For optimality, this should be used to generate the search function in the
|
||||||
|
* same file as the comparator and the comparator should be marked
|
||||||
|
* `__attribute__((always_inline) inline` so that the compiler will inline it.
|
||||||
|
*
|
||||||
|
* Arguments are:
|
||||||
|
*
|
||||||
|
* NAME - The function name for this instance of the search function. Use it
|
||||||
|
* in a subsequent call to zfs_btree_create().
|
||||||
|
* T - The element type stored inside the B-Tree.
|
||||||
|
* COMP - A comparator to compare two nodes, it must return exactly: -1, 0,
|
||||||
|
* or +1 -1 for <, 0 for ==, and +1 for >. For trivial comparisons,
|
||||||
|
* TREE_CMP() from avl.h can be used in a boilerplate function.
|
||||||
|
*/
|
||||||
|
/* BEGIN CSTYLED */
|
||||||
|
#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
|
||||||
|
_Pragma("GCC diagnostic push") \
|
||||||
|
_Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") \
|
||||||
|
static void * \
|
||||||
|
NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems, \
|
||||||
|
const void *value, zfs_btree_index_t *where) \
|
||||||
|
{ \
|
||||||
|
T *i = (T *)buf; \
|
||||||
|
(void) tree; \
|
||||||
|
_Pragma("GCC unroll 9") \
|
||||||
|
while (nelems > 1) { \
|
||||||
|
uint32_t half = nelems / 2; \
|
||||||
|
nelems -= half; \
|
||||||
|
i += (COMP(&i[half - 1], value) < 0) * half; \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
int comp = COMP(i, value); \
|
||||||
|
where->bti_offset = (i - (T *)buf) + (comp < 0); \
|
||||||
|
where->bti_before = (comp != 0); \
|
||||||
|
\
|
||||||
|
if (comp == 0) { \
|
||||||
|
return (i); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
return (NULL); \
|
||||||
|
} \
|
||||||
|
_Pragma("GCC diagnostic pop")
|
||||||
|
/* END CSTYLED */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and deallocate caches for btree nodes.
|
* Allocate and deallocate caches for btree nodes.
|
||||||
|
@ -129,13 +181,19 @@ void zfs_btree_fini(void);
|
||||||
* tree - the tree to be initialized
|
* tree - the tree to be initialized
|
||||||
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
|
* compar - function to compare two nodes, it must return exactly: -1, 0, or +1
|
||||||
* -1 for <, 0 for ==, and +1 for >
|
* -1 for <, 0 for ==, and +1 for >
|
||||||
|
* find - optional function to accelerate searches inside B-Tree nodes
|
||||||
|
* through Shar's algorithm and comparator inlining. Setting this to
|
||||||
|
* NULL will use a generic function. The function should be created
|
||||||
|
* using ZFS_BTREE_FIND_IN_BUF_FUNC() in the same file as compar.
|
||||||
|
* compar should be marked `__attribute__((always_inline)) inline` or
|
||||||
|
* performance is unlikely to improve very much.
|
||||||
* size - the value of sizeof(struct my_type)
|
* size - the value of sizeof(struct my_type)
|
||||||
* lsize - custom leaf size
|
* lsize - custom leaf size
|
||||||
*/
|
*/
|
||||||
void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
|
void zfs_btree_create(zfs_btree_t *, int (*) (const void *, const void *),
|
||||||
size_t);
|
bt_find_in_buf_f, size_t);
|
||||||
void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
|
void zfs_btree_create_custom(zfs_btree_t *, int (*)(const void *, const void *),
|
||||||
size_t, size_t);
|
bt_find_in_buf_f, size_t, size_t);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find a node with a matching value in the tree. Returns the matching node
|
* Find a node with a matching value in the tree. Returns the matching node
|
||||||
|
|
|
@ -34,6 +34,20 @@ ifeq ($(CONFIG_KASAN),y)
|
||||||
ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
|
ZFS_MODULE_CFLAGS += -Wno-error=frame-larger-than=
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# Generated binary search code is particularly bad with this optimization.
|
||||||
|
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
|
||||||
|
# is not affected when unrolling is done.
|
||||||
|
# Disable it until the following upstream issue is resolved:
|
||||||
|
# https://github.com/llvm/llvm-project/issues/62790
|
||||||
|
ifeq ($(CONFIG_X86),y)
|
||||||
|
ifeq ($(CONFIG_CC_IS_CLANG),y)
|
||||||
|
CFLAGS_zfs/dsl_scan.o += -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS_zfs/metaslab.o += -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS_zfs/range_tree.o += -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS_zfs/zap_micro.o += -mllvm -x86-cmov-converter=false
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq ($(KBUILD_EXTMOD),)
|
ifneq ($(KBUILD_EXTMOD),)
|
||||||
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
|
@CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I@QAT_SRC@/include
|
||||||
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
|
@CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
|
||||||
|
|
|
@ -400,6 +400,20 @@ beforeinstall:
|
||||||
|
|
||||||
.include <bsd.kmod.mk>
|
.include <bsd.kmod.mk>
|
||||||
|
|
||||||
|
# Generated binary search code is particularly bad with this optimization.
|
||||||
|
# Oddly, range_tree.c is not affected when unrolling is not done and dsl_scan.c
|
||||||
|
# is not affected when unrolling is done.
|
||||||
|
# Disable it until the following upstream issue is resolved:
|
||||||
|
# https://github.com/llvm/llvm-project/issues/62790
|
||||||
|
.if ${CC} == "clang"
|
||||||
|
.if ${MACHINE_ARCH} == "i386" || ${MACHINE_ARCH} == "amd64"
|
||||||
|
CFLAGS.dsl_scan.c= -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS.metaslab.c= -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS.range_tree.c= -mllvm -x86-cmov-converter=false
|
||||||
|
CFLAGS.zap_micro.c= -mllvm -x86-cmov-converter=false
|
||||||
|
.endif
|
||||||
|
.endif
|
||||||
|
|
||||||
CFLAGS.sysctl_os.c= -include ../zfs_config.h
|
CFLAGS.sysctl_os.c= -include ../zfs_config.h
|
||||||
CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h
|
CFLAGS.xxhash.c+= -include ${SYSDIR}/sys/_null.h
|
||||||
|
|
||||||
|
|
|
@ -193,14 +193,20 @@ zfs_btree_leaf_free(zfs_btree_t *tree, void *ptr)
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
|
zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
|
||||||
size_t size)
|
bt_find_in_buf_f bt_find_in_buf, size_t size)
|
||||||
{
|
{
|
||||||
zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
|
zfs_btree_create_custom(tree, compar, bt_find_in_buf, size,
|
||||||
|
BTREE_LEAF_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
|
||||||
|
const void *value, zfs_btree_index_t *where);
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_btree_create_custom(zfs_btree_t *tree,
|
zfs_btree_create_custom(zfs_btree_t *tree,
|
||||||
int (*compar) (const void *, const void *),
|
int (*compar) (const void *, const void *),
|
||||||
|
bt_find_in_buf_f bt_find_in_buf,
|
||||||
size_t size, size_t lsize)
|
size_t size, size_t lsize)
|
||||||
{
|
{
|
||||||
size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
|
size_t esize = lsize - offsetof(zfs_btree_leaf_t, btl_elems);
|
||||||
|
@ -208,6 +214,8 @@ zfs_btree_create_custom(zfs_btree_t *tree,
|
||||||
ASSERT3U(size, <=, esize / 2);
|
ASSERT3U(size, <=, esize / 2);
|
||||||
memset(tree, 0, sizeof (*tree));
|
memset(tree, 0, sizeof (*tree));
|
||||||
tree->bt_compar = compar;
|
tree->bt_compar = compar;
|
||||||
|
tree->bt_find_in_buf = (bt_find_in_buf == NULL) ?
|
||||||
|
zfs_btree_find_in_buf : bt_find_in_buf;
|
||||||
tree->bt_elem_size = size;
|
tree->bt_elem_size = size;
|
||||||
tree->bt_leaf_size = lsize;
|
tree->bt_leaf_size = lsize;
|
||||||
tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
|
tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
|
||||||
|
@ -303,7 +311,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
||||||
* element in the last leaf, it's in the last leaf or
|
* element in the last leaf, it's in the last leaf or
|
||||||
* it's not in the tree.
|
* it's not in the tree.
|
||||||
*/
|
*/
|
||||||
void *d = zfs_btree_find_in_buf(tree,
|
void *d = tree->bt_find_in_buf(tree,
|
||||||
last_leaf->btl_elems +
|
last_leaf->btl_elems +
|
||||||
last_leaf->btl_hdr.bth_first * size,
|
last_leaf->btl_hdr.bth_first * size,
|
||||||
last_leaf->btl_hdr.bth_count, value, &idx);
|
last_leaf->btl_hdr.bth_count, value, &idx);
|
||||||
|
@ -327,7 +335,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
||||||
for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
|
for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
|
||||||
node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
|
node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
|
||||||
ASSERT3P(node, !=, NULL);
|
ASSERT3P(node, !=, NULL);
|
||||||
void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
|
void *d = tree->bt_find_in_buf(tree, node->btc_elems,
|
||||||
node->btc_hdr.bth_count, value, &idx);
|
node->btc_hdr.bth_count, value, &idx);
|
||||||
EQUIV(d != NULL, !idx.bti_before);
|
EQUIV(d != NULL, !idx.bti_before);
|
||||||
if (d != NULL) {
|
if (d != NULL) {
|
||||||
|
@ -347,7 +355,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
|
||||||
*/
|
*/
|
||||||
zfs_btree_leaf_t *leaf = (depth == 0 ?
|
zfs_btree_leaf_t *leaf = (depth == 0 ?
|
||||||
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
|
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
|
||||||
void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
|
void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
|
||||||
leaf->btl_hdr.bth_first * size,
|
leaf->btl_hdr.bth_first * size,
|
||||||
leaf->btl_hdr.bth_count, value, &idx);
|
leaf->btl_hdr.bth_count, value, &idx);
|
||||||
|
|
||||||
|
@ -671,7 +679,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
|
||||||
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
|
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
|
||||||
zfs_btree_index_t idx;
|
zfs_btree_index_t idx;
|
||||||
ASSERT(zfs_btree_is_core(par_hdr));
|
ASSERT(zfs_btree_is_core(par_hdr));
|
||||||
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
|
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
|
||||||
par_hdr->bth_count, buf, &idx), ==, NULL);
|
par_hdr->bth_count, buf, &idx), ==, NULL);
|
||||||
ASSERT(idx.bti_before);
|
ASSERT(idx.bti_before);
|
||||||
uint32_t offset = idx.bti_offset;
|
uint32_t offset = idx.bti_offset;
|
||||||
|
@ -897,7 +905,7 @@ zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
|
||||||
}
|
}
|
||||||
zfs_btree_index_t idx;
|
zfs_btree_index_t idx;
|
||||||
zfs_btree_core_t *parent = hdr->bth_parent;
|
zfs_btree_core_t *parent = hdr->bth_parent;
|
||||||
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
|
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
|
||||||
parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
|
parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
|
||||||
ASSERT(idx.bti_before);
|
ASSERT(idx.bti_before);
|
||||||
ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
|
ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);
|
||||||
|
|
|
@ -4877,6 +4877,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||||
* with single operation. Plus it makes scrubs more sequential and reduces
|
* with single operation. Plus it makes scrubs more sequential and reduces
|
||||||
* chances that minor extent change move it within the B-tree.
|
* chances that minor extent change move it within the B-tree.
|
||||||
*/
|
*/
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
ext_size_compare(const void *x, const void *y)
|
ext_size_compare(const void *x, const void *y)
|
||||||
{
|
{
|
||||||
|
@ -4885,13 +4886,17 @@ ext_size_compare(const void *x, const void *y)
|
||||||
return (TREE_CMP(*a, *b));
|
return (TREE_CMP(*a, *b));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
|
||||||
|
ext_size_compare)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ext_size_create(range_tree_t *rt, void *arg)
|
ext_size_create(range_tree_t *rt, void *arg)
|
||||||
{
|
{
|
||||||
(void) rt;
|
(void) rt;
|
||||||
zfs_btree_t *size_tree = arg;
|
zfs_btree_t *size_tree = arg;
|
||||||
|
|
||||||
zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
|
zfs_btree_create(size_tree, ext_size_compare, ext_size_find_in_buf,
|
||||||
|
sizeof (uint64_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
@ -1342,6 +1342,7 @@ metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
|
||||||
* Comparison function for the private size-ordered tree using 32-bit
|
* Comparison function for the private size-ordered tree using 32-bit
|
||||||
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
||||||
*/
|
*/
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
metaslab_rangesize32_compare(const void *x1, const void *x2)
|
metaslab_rangesize32_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -1352,16 +1353,15 @@ metaslab_rangesize32_compare(const void *x1, const void *x2)
|
||||||
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
||||||
|
|
||||||
int cmp = TREE_CMP(rs_size1, rs_size2);
|
int cmp = TREE_CMP(rs_size1, rs_size2);
|
||||||
if (likely(cmp))
|
|
||||||
return (cmp);
|
|
||||||
|
|
||||||
return (TREE_CMP(r1->rs_start, r2->rs_start));
|
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Comparison function for the private size-ordered tree using 64-bit
|
* Comparison function for the private size-ordered tree using 64-bit
|
||||||
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
* ranges. Tree is sorted by size, larger sizes at the end of the tree.
|
||||||
*/
|
*/
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
metaslab_rangesize64_compare(const void *x1, const void *x2)
|
metaslab_rangesize64_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -1372,11 +1372,10 @@ metaslab_rangesize64_compare(const void *x1, const void *x2)
|
||||||
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
|
||||||
|
|
||||||
int cmp = TREE_CMP(rs_size1, rs_size2);
|
int cmp = TREE_CMP(rs_size1, rs_size2);
|
||||||
if (likely(cmp))
|
|
||||||
return (cmp);
|
|
||||||
|
|
||||||
return (TREE_CMP(r1->rs_start, r2->rs_start));
|
return (cmp + !cmp * TREE_CMP(r1->rs_start, r2->rs_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct metaslab_rt_arg {
|
typedef struct metaslab_rt_arg {
|
||||||
zfs_btree_t *mra_bt;
|
zfs_btree_t *mra_bt;
|
||||||
uint32_t mra_floor_shift;
|
uint32_t mra_floor_shift;
|
||||||
|
@ -1412,6 +1411,13 @@ metaslab_size_tree_full_load(range_tree_t *rt)
|
||||||
range_tree_walk(rt, metaslab_size_sorted_add, &arg);
|
range_tree_walk(rt, metaslab_size_sorted_add, &arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
|
||||||
|
range_seg32_t, metaslab_rangesize32_compare)
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
|
||||||
|
range_seg64_t, metaslab_rangesize64_compare)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create any block allocator specific components. The current allocators
|
* Create any block allocator specific components. The current allocators
|
||||||
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
|
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
|
||||||
|
@ -1424,19 +1430,22 @@ metaslab_rt_create(range_tree_t *rt, void *arg)
|
||||||
|
|
||||||
size_t size;
|
size_t size;
|
||||||
int (*compare) (const void *, const void *);
|
int (*compare) (const void *, const void *);
|
||||||
|
bt_find_in_buf_f bt_find;
|
||||||
switch (rt->rt_type) {
|
switch (rt->rt_type) {
|
||||||
case RANGE_SEG32:
|
case RANGE_SEG32:
|
||||||
size = sizeof (range_seg32_t);
|
size = sizeof (range_seg32_t);
|
||||||
compare = metaslab_rangesize32_compare;
|
compare = metaslab_rangesize32_compare;
|
||||||
|
bt_find = metaslab_rt_find_rangesize32_in_buf;
|
||||||
break;
|
break;
|
||||||
case RANGE_SEG64:
|
case RANGE_SEG64:
|
||||||
size = sizeof (range_seg64_t);
|
size = sizeof (range_seg64_t);
|
||||||
compare = metaslab_rangesize64_compare;
|
compare = metaslab_rangesize64_compare;
|
||||||
|
bt_find = metaslab_rt_find_rangesize64_in_buf;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
panic("Invalid range seg type %d", rt->rt_type);
|
panic("Invalid range seg type %d", rt->rt_type);
|
||||||
}
|
}
|
||||||
zfs_btree_create(size_tree, compare, size);
|
zfs_btree_create(size_tree, compare, bt_find, size);
|
||||||
mrap->mra_floor_shift = metaslab_by_size_min_shift;
|
mrap->mra_floor_shift = metaslab_by_size_min_shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -151,6 +151,7 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
|
||||||
rt->rt_histogram[idx]--;
|
rt->rt_histogram[idx]--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
range_tree_seg32_compare(const void *x1, const void *x2)
|
range_tree_seg32_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -163,6 +164,7 @@ range_tree_seg32_compare(const void *x1, const void *x2)
|
||||||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
range_tree_seg64_compare(const void *x1, const void *x2)
|
range_tree_seg64_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -175,6 +177,7 @@ range_tree_seg64_compare(const void *x1, const void *x2)
|
||||||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
range_tree_seg_gap_compare(const void *x1, const void *x2)
|
range_tree_seg_gap_compare(const void *x1, const void *x2)
|
||||||
{
|
{
|
||||||
|
@ -187,6 +190,15 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
|
||||||
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg32_t,
|
||||||
|
range_tree_seg32_compare)
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
|
||||||
|
range_tree_seg64_compare)
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg_gap_t,
|
||||||
|
range_tree_seg_gap_compare)
|
||||||
|
|
||||||
range_tree_t *
|
range_tree_t *
|
||||||
range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
|
range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
|
||||||
void *arg, uint64_t start, uint64_t shift, uint64_t gap)
|
void *arg, uint64_t start, uint64_t shift, uint64_t gap)
|
||||||
|
@ -197,23 +209,27 @@ range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
|
||||||
ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
|
ASSERT3U(type, <=, RANGE_SEG_NUM_TYPES);
|
||||||
size_t size;
|
size_t size;
|
||||||
int (*compare) (const void *, const void *);
|
int (*compare) (const void *, const void *);
|
||||||
|
bt_find_in_buf_f bt_find;
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case RANGE_SEG32:
|
case RANGE_SEG32:
|
||||||
size = sizeof (range_seg32_t);
|
size = sizeof (range_seg32_t);
|
||||||
compare = range_tree_seg32_compare;
|
compare = range_tree_seg32_compare;
|
||||||
|
bt_find = range_tree_seg32_find_in_buf;
|
||||||
break;
|
break;
|
||||||
case RANGE_SEG64:
|
case RANGE_SEG64:
|
||||||
size = sizeof (range_seg64_t);
|
size = sizeof (range_seg64_t);
|
||||||
compare = range_tree_seg64_compare;
|
compare = range_tree_seg64_compare;
|
||||||
|
bt_find = range_tree_seg64_find_in_buf;
|
||||||
break;
|
break;
|
||||||
case RANGE_SEG_GAP:
|
case RANGE_SEG_GAP:
|
||||||
size = sizeof (range_seg_gap_t);
|
size = sizeof (range_seg_gap_t);
|
||||||
compare = range_tree_seg_gap_compare;
|
compare = range_tree_seg_gap_compare;
|
||||||
|
bt_find = range_tree_seg_gap_find_in_buf;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
panic("Invalid range seg type %d", type);
|
panic("Invalid range seg type %d", type);
|
||||||
}
|
}
|
||||||
zfs_btree_create(&rt->rt_root, compare, size);
|
zfs_btree_create(&rt->rt_root, compare, bt_find, size);
|
||||||
|
|
||||||
rt->rt_ops = ops;
|
rt->rt_ops = ops;
|
||||||
rt->rt_gap = gap;
|
rt->rt_gap = gap;
|
||||||
|
|
|
@ -285,6 +285,7 @@ zap_byteswap(void *buf, size_t size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline)) inline
|
||||||
static int
|
static int
|
||||||
mze_compare(const void *arg1, const void *arg2)
|
mze_compare(const void *arg1, const void *arg2)
|
||||||
{
|
{
|
||||||
|
@ -295,6 +296,9 @@ mze_compare(const void *arg1, const void *arg2)
|
||||||
(uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
|
(uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t,
|
||||||
|
mze_compare)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
|
mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash)
|
||||||
{
|
{
|
||||||
|
@ -461,7 +465,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
|
||||||
* 62 entries before we have to add 2KB B-tree core node.
|
* 62 entries before we have to add 2KB B-tree core node.
|
||||||
*/
|
*/
|
||||||
zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
|
zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare,
|
||||||
sizeof (mzap_ent_t), 512);
|
mze_find_in_buf, sizeof (mzap_ent_t), 512);
|
||||||
|
|
||||||
zap_name_t *zn = zap_name_alloc(zap);
|
zap_name_t *zn = zap_name_alloc(zap);
|
||||||
for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
|
for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) {
|
||||||
|
|
|
@ -501,7 +501,7 @@ main(int argc, char *argv[])
|
||||||
srandom(seed);
|
srandom(seed);
|
||||||
|
|
||||||
zfs_btree_init();
|
zfs_btree_init();
|
||||||
zfs_btree_create(&bt, zfs_btree_compare, sizeof (uint64_t));
|
zfs_btree_create(&bt, zfs_btree_compare, NULL, sizeof (uint64_t));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This runs the named negative test. None of them should
|
* This runs the named negative test. None of them should
|
||||||
|
|
Loading…
Reference in New Issue