Add ZAP shrinking support
This allows ZAPs to shrink. When there are two empty sibling leafs, one of them is collapsed and its storage space is reused. This improved performance on directories that at one time contained a large number of files, but many or all of those files have since been deleted. This also applies to all other types of ZAPs as well. Sponsored-by: iXsystems, Inc. Sponsored-by: Klara, Inc. Signed-off-by: Alexander Stetsenko <alex.stetsenko@klarasystems.com>
This commit is contained in:
parent
d7605ae77b
commit
94c323bf39
|
@ -16,7 +16,7 @@
|
||||||
.\" own identifying information:
|
.\" own identifying information:
|
||||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||||
.\"
|
.\"
|
||||||
.Dd January 9, 2024
|
.Dd February 14, 2024
|
||||||
.Dt ZFS 4
|
.Dt ZFS 4
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
|
@ -564,9 +564,8 @@ However, this is limited by
|
||||||
Maximum micro ZAP size.
|
Maximum micro ZAP size.
|
||||||
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
A micro ZAP is upgraded to a fat ZAP, once it grows beyond the specified size.
|
||||||
.
|
.
|
||||||
.It Sy zfetch_hole_shift Ns = Ns Sy 2 Pq uint
|
.It Sy zap_shrink_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||||
Log2 fraction of holes in speculative prefetch stream allowed for it to
|
If set, adjacent empty ZAP blocks will be collapsed, reducing disk space.
|
||||||
proceed.
|
|
||||||
.
|
.
|
||||||
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
.It Sy zfetch_min_distance Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq uint
|
||||||
Min bytes to prefetch per stream.
|
Min bytes to prefetch per stream.
|
||||||
|
|
385
module/zfs/zap.c
385
module/zfs/zap.c
|
@ -22,6 +22,8 @@
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||||
|
* Copyright 2023 Alexander Stetsenko <alex.stetsenko@gmail.com>
|
||||||
|
* Copyright (c) 2023, Klara Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -41,6 +43,7 @@
|
||||||
|
|
||||||
#include <sys/spa.h>
|
#include <sys/spa.h>
|
||||||
#include <sys/dmu.h>
|
#include <sys/dmu.h>
|
||||||
|
#include <sys/dnode.h>
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
#include <sys/fs/zfs.h>
|
#include <sys/fs/zfs.h>
|
||||||
|
@ -78,9 +81,16 @@
|
||||||
*/
|
*/
|
||||||
static int zap_iterate_prefetch = B_TRUE;
|
static int zap_iterate_prefetch = B_TRUE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable ZAP shrinking. When enabled, empty sibling leaf blocks will be
|
||||||
|
* collapsed into a single block.
|
||||||
|
*/
|
||||||
|
int zap_shrink_enabled = B_TRUE;
|
||||||
|
|
||||||
int fzap_default_block_shift = 14; /* 16k blocksize */
|
int fzap_default_block_shift = 14; /* 16k blocksize */
|
||||||
|
|
||||||
static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
|
static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
|
||||||
|
static int zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx);
|
||||||
|
|
||||||
void
|
void
|
||||||
fzap_byteswap(void *vbuf, size_t size)
|
fzap_byteswap(void *vbuf, size_t size)
|
||||||
|
@ -586,6 +596,118 @@ zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
zap_set_idx_range_to_blk(zap_t *zap, uint64_t idx, uint64_t nptrs, uint64_t blk,
|
||||||
|
dmu_tx_t *tx)
|
||||||
|
{
|
||||||
|
int bs = FZAP_BLOCK_SHIFT(zap);
|
||||||
|
int epb = bs >> 3; /* entries per block */
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
ASSERT(tx != NULL);
|
||||||
|
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for i/o errors
|
||||||
|
*/
|
||||||
|
for (int i = 0; i < nptrs; i += epb) {
|
||||||
|
uint64_t blk;
|
||||||
|
err = zap_idx_to_blk(zap, idx + i, &blk);
|
||||||
|
if (err != 0) {
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nptrs; i++) {
|
||||||
|
err = zap_set_idx_to_blk(zap, idx + i, blk, tx);
|
||||||
|
ASSERT0(err); /* we checked for i/o errors above */
|
||||||
|
if (err != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ZAP_PREFIX_HASH(pref, pref_len) ((pref) << (64 - (pref_len)))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This checks whether a leaf with prefix/len exists and returns its blkid.
|
||||||
|
*
|
||||||
|
* The prefix/len correspond to a distinct range of entries in ptrtbl.
|
||||||
|
* If all range entries contain the same value (blkid) and only the range
|
||||||
|
* entries contain this blkid, then there exists a leaf with this blkid and
|
||||||
|
* given prefix/len.
|
||||||
|
*
|
||||||
|
* We don't have to check all entries in the range. Instead, we can check only
|
||||||
|
* the first and the last one. If both contain the same blkid, then we check
|
||||||
|
* the neighbor entries (entry before the first and entry after the last).
|
||||||
|
*
|
||||||
|
* A leaf with prefix/len exists if
|
||||||
|
* (first == last AND before-first != blkid AND after-last != blkid).
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
zap_check_leaf_by_ptrtbl(zap_t *zap, uint64_t prefix, uint64_t prefix_len)
|
||||||
|
{
|
||||||
|
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
|
||||||
|
|
||||||
|
uint64_t h = ZAP_PREFIX_HASH(prefix, prefix_len);
|
||||||
|
uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
|
||||||
|
uint64_t pref_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift - prefix_len;
|
||||||
|
uint64_t nptrs = (1 << pref_diff);
|
||||||
|
int slbit = prefix & 1;
|
||||||
|
uint64_t first;
|
||||||
|
uint64_t last;
|
||||||
|
|
||||||
|
ASSERT3U(idx+nptrs, <=, (1UL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
|
||||||
|
|
||||||
|
if (zap_idx_to_blk(zap, idx, &first) != 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
if (zap_idx_to_blk(zap, idx + nptrs - 1, &last) != 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the last possible sibling entry. If the first entry and the
|
||||||
|
* last one differs it is not a sibling.
|
||||||
|
*/
|
||||||
|
if (first != last)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there are entries after the last one, check it as well.
|
||||||
|
* It should not be the same as the last entry, otherwise it is
|
||||||
|
* not a sibling.
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Check the entry before the first one.
|
||||||
|
*/
|
||||||
|
if (slbit == 0 && idx > 0) {
|
||||||
|
uint64_t before_first;
|
||||||
|
|
||||||
|
if (zap_idx_to_blk(zap, idx - 1, &before_first) != 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
if (before_first == first)
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check the entry after the last one.
|
||||||
|
*/
|
||||||
|
if (slbit == 1 &&
|
||||||
|
idx + nptrs < (1UL << zap_f_phys(zap)->zap_ptrtbl.zt_shift)) {
|
||||||
|
uint64_t after_last;
|
||||||
|
|
||||||
|
if (zap_idx_to_blk(zap, idx + nptrs, &after_last) != 0)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
if (last == after_last)
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (first);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
|
zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
|
||||||
{
|
{
|
||||||
|
@ -958,6 +1080,10 @@ fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
zap_entry_remove(&zeh);
|
zap_entry_remove(&zeh);
|
||||||
zap_increment_num_entries(zn->zn_zap, -1, tx);
|
zap_increment_num_entries(zn->zn_zap, -1, tx);
|
||||||
|
|
||||||
|
if (zap_leaf_phys(l)->l_hdr.lh_nentries == 0 &&
|
||||||
|
zap_shrink_enabled)
|
||||||
|
return (zap_shrink(zn, l, tx));
|
||||||
}
|
}
|
||||||
zap_put_leaf(l);
|
zap_put_leaf(l);
|
||||||
return (err);
|
return (err);
|
||||||
|
@ -1222,13 +1348,19 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
|
||||||
ZIO_PRIORITY_ASYNC_READ);
|
ZIO_PRIORITY_ASYNC_READ);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zc->zc_leaf &&
|
if (zc->zc_leaf) {
|
||||||
(ZAP_HASH_IDX(zc->zc_hash,
|
|
||||||
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
|
|
||||||
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix)) {
|
|
||||||
rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
|
rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
|
||||||
zap_put_leaf(zc->zc_leaf);
|
|
||||||
zc->zc_leaf = NULL;
|
/*
|
||||||
|
* The leaf was either shrunk or split.
|
||||||
|
*/
|
||||||
|
if ((zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_block_type == 0) ||
|
||||||
|
(ZAP_HASH_IDX(zc->zc_hash,
|
||||||
|
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix_len) !=
|
||||||
|
zap_leaf_phys(zc->zc_leaf)->l_hdr.lh_prefix)) {
|
||||||
|
zap_put_leaf(zc->zc_leaf);
|
||||||
|
zc->zc_leaf = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
again:
|
again:
|
||||||
|
@ -1237,8 +1369,6 @@ again:
|
||||||
&zc->zc_leaf);
|
&zc->zc_leaf);
|
||||||
if (err != 0)
|
if (err != 0)
|
||||||
return (err);
|
return (err);
|
||||||
} else {
|
|
||||||
rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
|
|
||||||
}
|
}
|
||||||
l = zc->zc_leaf;
|
l = zc->zc_leaf;
|
||||||
|
|
||||||
|
@ -1367,6 +1497,245 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find last allocated block and update freeblk.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
zap_trunc(zap_t *zap)
|
||||||
|
{
|
||||||
|
uint64_t nentries;
|
||||||
|
uint64_t lastblk;
|
||||||
|
|
||||||
|
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
|
||||||
|
|
||||||
|
if (zap_f_phys(zap)->zap_ptrtbl.zt_blk > 0) {
|
||||||
|
/* External ptrtbl */
|
||||||
|
nentries = (1 << zap_f_phys(zap)->zap_ptrtbl.zt_shift);
|
||||||
|
lastblk = zap_f_phys(zap)->zap_ptrtbl.zt_blk +
|
||||||
|
zap_f_phys(zap)->zap_ptrtbl.zt_numblks - 1;
|
||||||
|
} else {
|
||||||
|
/* Embedded ptrtbl */
|
||||||
|
nentries = (1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
|
||||||
|
lastblk = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t idx = 0; idx < nentries; idx++) {
|
||||||
|
uint64_t blk;
|
||||||
|
zap_idx_to_blk(zap, idx, &blk);
|
||||||
|
if (blk > lastblk)
|
||||||
|
lastblk = blk;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT3U(lastblk, <, zap_f_phys(zap)->zap_freeblk);
|
||||||
|
|
||||||
|
zap_f_phys(zap)->zap_freeblk = lastblk + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ZAP shrinking algorithm.
|
||||||
|
*
|
||||||
|
* We shrink ZAP recuresively removing empty leaves. We can remove an empty leaf
|
||||||
|
* only if it has a sibling. Sibling leaves have the same prefix length and
|
||||||
|
* their prefixes differ only by the least significant (sibling) bit. We require
|
||||||
|
* both siblings to be empty. This eliminates a need to rehash the non-empty
|
||||||
|
* remaining leaf. When we have removed one of two empty sibling, we set ptrtbl
|
||||||
|
* entries of the removed leaf to point out to the remaining leaf. Prefix length
|
||||||
|
* of the remaining leaf is decremented. As a result, it has a new prefix and it
|
||||||
|
* might have a new sibling. So, we repeat the process.
|
||||||
|
*
|
||||||
|
* Steps:
|
||||||
|
* 1. Check if a sibling leaf (sl) exists and it is empty.
|
||||||
|
* 2. Release the leaf (l) if it has the sibling bit (slbit) equal to 1.
|
||||||
|
* 3. Release the sibling (sl) to derefer it again with WRITER lock.
|
||||||
|
* 4. Upgrade zapdir lock to WRITER (once).
|
||||||
|
* 5. Derefer released leaves again.
|
||||||
|
* 6. If it is needed, recheck whether both leaves are still siblings and empty.
|
||||||
|
* 7. Set ptrtbl pointers of the removed leaf (slbit 1) to point out to blkid of
|
||||||
|
* the remaining leaf (slbit 0).
|
||||||
|
* 8. Free disk block of the removed leaf (dmu_free_range).
|
||||||
|
* 9. Decrement prefix_len of the remaining leaf.
|
||||||
|
* 10. Repeat the steps.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
|
||||||
|
{
|
||||||
|
zap_t *zap = zn->zn_zap;
|
||||||
|
int64_t zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
|
||||||
|
uint64_t hash = zn->zn_hash;
|
||||||
|
uint64_t prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
|
||||||
|
uint64_t prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
|
||||||
|
boolean_t trunc = 0;
|
||||||
|
int nshrunk = 0;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
|
||||||
|
ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
|
||||||
|
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
|
||||||
|
ASSERT3U(ZAP_HASH_IDX(hash, prefix_len), ==, prefix);
|
||||||
|
|
||||||
|
boolean_t writer = B_FALSE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To avoid deadlock always deref leaves in the same order -
|
||||||
|
* sibling 0 first, then sibling 1.
|
||||||
|
*/
|
||||||
|
while (prefix_len) {
|
||||||
|
zap_leaf_t *sl;
|
||||||
|
int64_t prefix_diff = zt_shift - prefix_len;
|
||||||
|
uint64_t sl_prefix = prefix ^ 1;
|
||||||
|
uint64_t sl_hash = ZAP_PREFIX_HASH(sl_prefix, prefix_len);
|
||||||
|
int slbit = prefix & 1;
|
||||||
|
|
||||||
|
ASSERT3U(zt_shift, ==, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
|
||||||
|
ASSERT3S(zap_leaf_phys(l)->l_hdr.lh_nentries, ==, 0);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if there is a sibling by reading prttbl ptrs.
|
||||||
|
*/
|
||||||
|
if (zap_check_leaf_by_ptrtbl(zap, sl_prefix, prefix_len) == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sibling 1, unlock it - we haven't yet dereferenced sibling 0.
|
||||||
|
*/
|
||||||
|
if (slbit == 1) {
|
||||||
|
zap_put_leaf(l);
|
||||||
|
l = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dereference sibling leaf and check if it is empty.
|
||||||
|
*/
|
||||||
|
if ((err = zap_deref_leaf(zap, sl_hash, tx, RW_READER,
|
||||||
|
&sl)) != 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
ASSERT3U(ZAP_HASH_IDX(sl_hash, prefix_len), ==, sl_prefix);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if we have a sibling and it is empty.
|
||||||
|
*/
|
||||||
|
if (zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len ||
|
||||||
|
zap_leaf_phys(sl)->l_hdr.lh_nentries != 0) {
|
||||||
|
zap_put_leaf(sl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
zap_put_leaf(sl);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If there two empty sibling, we have work to do, so
|
||||||
|
* we need to lock ZAP ptrtbl as WRITER.
|
||||||
|
*/
|
||||||
|
if (!writer && (writer = zap_tryupgradedir(zap, tx)) == 0) {
|
||||||
|
/* We failed to upgrade */
|
||||||
|
if (l != NULL) {
|
||||||
|
zap_put_leaf(l);
|
||||||
|
l = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Usually, the right way to upgrade from a READER lock
|
||||||
|
* to a WRITER lock is to call zap_unlockdir() and
|
||||||
|
* zap_lockdir(), but we do not have a tag. Instead,
|
||||||
|
* we do it in more sophisticated way.
|
||||||
|
*/
|
||||||
|
rw_exit(&zap->zap_rwlock);
|
||||||
|
rw_enter(&zap->zap_rwlock, RW_WRITER);
|
||||||
|
dmu_buf_will_dirty(zap->zap_dbuf, tx);
|
||||||
|
|
||||||
|
writer = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here we have WRITER lock for ptrtbl.
|
||||||
|
* Now, we need a WRITER lock for both siblings leaves.
|
||||||
|
* Also, we have to recheck if the leaves are still siblings
|
||||||
|
* and still empty.
|
||||||
|
*/
|
||||||
|
if (l == NULL) {
|
||||||
|
/* sibling 0 */
|
||||||
|
if ((err = zap_deref_leaf(zap, (slbit ? sl_hash : hash),
|
||||||
|
tx, RW_WRITER, &l)) != 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The leaf isn't empty anymore or
|
||||||
|
* it was shrunk/split while our locks were down.
|
||||||
|
*/
|
||||||
|
if (zap_leaf_phys(l)->l_hdr.lh_nentries != 0 ||
|
||||||
|
zap_leaf_phys(l)->l_hdr.lh_prefix_len != prefix_len)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sibling 1 */
|
||||||
|
if ((err = zap_deref_leaf(zap, (slbit ? hash : sl_hash), tx,
|
||||||
|
RW_WRITER, &sl)) != 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The leaf isn't empty anymore or
|
||||||
|
* it was shrunk/split while our locks were down.
|
||||||
|
*/
|
||||||
|
if (zap_leaf_phys(sl)->l_hdr.lh_nentries != 0 ||
|
||||||
|
zap_leaf_phys(sl)->l_hdr.lh_prefix_len != prefix_len) {
|
||||||
|
zap_put_leaf(sl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we have gotten here, we have a leaf to collapse */
|
||||||
|
uint64_t idx = (slbit ? prefix : sl_prefix) << prefix_diff;
|
||||||
|
uint64_t nptrs = (1ULL << prefix_diff);
|
||||||
|
uint64_t sl_blkid = sl->l_blkid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set ptrtbl entries to point out to the slibling 0 blkid
|
||||||
|
*/
|
||||||
|
if ((err = zap_set_idx_range_to_blk(zap, idx, nptrs, l->l_blkid,
|
||||||
|
tx)) != 0) {
|
||||||
|
zap_put_leaf(sl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free sibling 1 disk block.
|
||||||
|
*/
|
||||||
|
int bs = FZAP_BLOCK_SHIFT(zap);
|
||||||
|
if (sl_blkid == zap_f_phys(zap)->zap_freeblk - 1)
|
||||||
|
trunc = B_TRUE;
|
||||||
|
|
||||||
|
(void) dmu_free_range(zap->zap_objset, zap->zap_object,
|
||||||
|
sl_blkid << bs, 1 << bs, tx);
|
||||||
|
zap_put_leaf(sl);
|
||||||
|
|
||||||
|
zap_f_phys(zap)->zap_num_leafs--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update prefix and prefix_len.
|
||||||
|
*/
|
||||||
|
zap_leaf_phys(l)->l_hdr.lh_prefix >>= 1;
|
||||||
|
zap_leaf_phys(l)->l_hdr.lh_prefix_len--;
|
||||||
|
|
||||||
|
prefix = zap_leaf_phys(l)->l_hdr.lh_prefix;
|
||||||
|
prefix_len = zap_leaf_phys(l)->l_hdr.lh_prefix_len;
|
||||||
|
|
||||||
|
nshrunk++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trunc)
|
||||||
|
zap_trunc(zap);
|
||||||
|
|
||||||
|
if (l != NULL)
|
||||||
|
zap_put_leaf(l);
|
||||||
|
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
|
|
||||||
/* CSTYLED */
|
/* CSTYLED */
|
||||||
ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW,
|
||||||
"When iterating ZAP object, prefetch it");
|
"When iterating ZAP object, prefetch it");
|
||||||
|
|
||||||
|
/* CSTYLED */
|
||||||
|
ZFS_MODULE_PARAM(zfs, , zap_shrink_enabled, INT, ZMOD_RW,
|
||||||
|
"Enable ZAP shrinking");
|
||||||
|
|
Loading…
Reference in New Issue