Several B-tree optimizations
- Introduce first element offset within a leaf. It allows to reduce by ~50% average memmove() size when adding/removing elements. If the added/removed element is in the first half of the leaf, we may shift elements before it and adjust the bth_first instead of moving more elements after it. - Use memcpy() instead of memmove() when we know there is no overlap. - Switch from uint64_t to uint32_t. It does not limit anything, but 32-bit arches should appreciate it greatly in hot paths. - Store leaf capacity in struct btree to avoid 64-bit divisions. - Adjust zfs_btree_insert_into_leaf() to always result in balanced leaves after splitting, no matter where the new element was inserted. Not that we care about it much, but it should also allow B-trees with as little as two elements per leaf instead of 4 previously. When scrubbing pool of 12 SSDs, storing 1.5TB of 4KB zvol blocks this reduces amount of time spent in memmove() inside the scan thread from 13.7% to 5.7% and total scrub time by ~15 seconds out of 9 minutes. It should also reduce spacemaps load time, but I haven't measured it. Reviewed-by: Paul Dagnelie <pcd@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #13582
This commit is contained in:
parent
a861aa2b9e
commit
dc91a6a660
|
@ -72,7 +72,11 @@ extern kmem_cache_t *zfs_btree_leaf_cache;
|
||||||
|
|
||||||
typedef struct zfs_btree_hdr {
|
typedef struct zfs_btree_hdr {
|
||||||
struct zfs_btree_core *bth_parent;
|
struct zfs_btree_core *bth_parent;
|
||||||
boolean_t bth_core;
|
/*
|
||||||
|
* Set to -1 to indicate core nodes. Other values represent first
|
||||||
|
* valid element offset for leaf nodes.
|
||||||
|
*/
|
||||||
|
uint32_t bth_first;
|
||||||
/*
|
/*
|
||||||
* For both leaf and core nodes, represents the number of elements in
|
* For both leaf and core nodes, represents the number of elements in
|
||||||
* the node. For core nodes, they will have bth_count + 1 children.
|
* the node. For core nodes, they will have bth_count + 1 children.
|
||||||
|
@ -91,9 +95,12 @@ typedef struct zfs_btree_leaf {
|
||||||
uint8_t btl_elems[];
|
uint8_t btl_elems[];
|
||||||
} zfs_btree_leaf_t;
|
} zfs_btree_leaf_t;
|
||||||
|
|
||||||
|
#define BTREE_LEAF_ESIZE (BTREE_LEAF_SIZE - \
|
||||||
|
offsetof(zfs_btree_leaf_t, btl_elems))
|
||||||
|
|
||||||
typedef struct zfs_btree_index {
|
typedef struct zfs_btree_index {
|
||||||
zfs_btree_hdr_t *bti_node;
|
zfs_btree_hdr_t *bti_node;
|
||||||
uint64_t bti_offset;
|
uint32_t bti_offset;
|
||||||
/*
|
/*
|
||||||
* True if the location is before the list offset, false if it's at
|
* True if the location is before the list offset, false if it's at
|
||||||
* the listed offset.
|
* the listed offset.
|
||||||
|
@ -105,6 +112,7 @@ typedef struct btree {
|
||||||
zfs_btree_hdr_t *bt_root;
|
zfs_btree_hdr_t *bt_root;
|
||||||
int64_t bt_height;
|
int64_t bt_height;
|
||||||
size_t bt_elem_size;
|
size_t bt_elem_size;
|
||||||
|
uint32_t bt_leaf_cap;
|
||||||
uint64_t bt_num_elems;
|
uint64_t bt_num_elems;
|
||||||
uint64_t bt_num_nodes;
|
uint64_t bt_num_nodes;
|
||||||
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
|
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue