ZFS Version 2.2.2
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEETzupq20fjWg9wt+1athg7tRZgCcFAmVpF5cACgkQathg7tRZ gCccrg/9HvUbVz2D4uEulwHhXfjYlQAq6u7aUnLNBP9bKypb6aQ0xoCCFUPycAgp 7NojDpZneizQx3CpBgKoof3yS1cxPTGpaJmXZUEd9XGGv8u9XYV2bjSQCyoIcRun FLhFyI95qcATP1gm4QC9ZWlGVoSQrzSuGcO9QSSpgK1npV27SZuIn/f8UmvkihZt R1GMSK//V0Um48QmOmzAJikGAoRG5KHVEaBeEnD2EEHB2q0bcagC/XUBQQDayG6R eHa/WTrwyh89ij5lf4AWi29MXBSEu/5VPRmDytXZeMyCc5nI3B2YC/dS9GHGF8mf H0UaIoxUBTdRW4qNLbZ7zPfXAEQLOkOYvoeUzfmSfPIz+rITQ7AUi2vwcf5wO1lI o9BpaYnze3cDfZ9Nvv7tamOORbKWyFsIgro4DFh4PwbYqhHB93u1bhpb8HHObrVX mVvQkzW0fmCpXgfZs5NwwrGkWzKYV2Ck6iyPXLQnnG9vQ6nSDA8UdCVgNWmMgUar S+sCtac/7A5sEbUYNEktLJybr1AN4wKbVUWRC7Sd6My0daEZsSy8mrWeCbMOLVa6 ADbrX+OEk9LZkiUjPiLrfmnJ7bs5QHEbB/hN27qifIrVUvgr/Eo9XeJzx3SgMRTA 9PYkpRJnfZYisIL0Td2pkMAoja3LtXBMRXv1iKO4nfo7460amU8= =kjGu -----END PGP SIGNATURE----- Merge tag 'zfs-2.2.2' into truenas/zfs-2.2.2-test ZFS Version 2.2.2 Signed-off-by: Ameer Hamza <ahamza@ixsystems.com>
This commit is contained in:
commit
d762e7eabe
2
META
2
META
|
@ -1,7 +1,7 @@
|
|||
Meta: 1
|
||||
Name: zfs
|
||||
Branch: 1.0
|
||||
Version: 2.2.1
|
||||
Version: 2.2.2
|
||||
Release: 1
|
||||
Release-Tags: relext
|
||||
License: CDDL
|
||||
|
|
|
@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
|
|||
|
||||
# Supported Kernels
|
||||
* The `META` file contains the officially recognized supported Linux kernel versions.
|
||||
* Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
|
||||
* Supported FreeBSD versions are any supported branches and releases starting from 12.4-RELEASE.
|
||||
|
|
129
cmd/zdb/zdb.c
129
cmd/zdb/zdb.c
|
@ -34,6 +34,7 @@
|
|||
* Copyright (c) 2021 Allan Jude
|
||||
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
* Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -80,6 +81,7 @@
|
|||
#include <sys/dsl_scan.h>
|
||||
#include <sys/btree.h>
|
||||
#include <sys/brt.h>
|
||||
#include <sys/brt_impl.h>
|
||||
#include <zfs_comutil.h>
|
||||
#include <sys/zstd/zstd.h>
|
||||
|
||||
|
@ -899,6 +901,8 @@ usage(void)
|
|||
"don't print label contents\n");
|
||||
(void) fprintf(stderr, " -t --txg=INTEGER "
|
||||
"highest txg to use when searching for uberblocks\n");
|
||||
(void) fprintf(stderr, " -T --brt-stats "
|
||||
"BRT statistics\n");
|
||||
(void) fprintf(stderr, " -u --uberblock "
|
||||
"uberblock\n");
|
||||
(void) fprintf(stderr, " -U --cachefile=PATH "
|
||||
|
@ -999,6 +1003,15 @@ zdb_nicenum(uint64_t num, char *buf, size_t buflen)
|
|||
nicenum(num, buf, buflen);
|
||||
}
|
||||
|
||||
static void
|
||||
zdb_nicebytes(uint64_t bytes, char *buf, size_t buflen)
|
||||
{
|
||||
if (dump_opt['P'])
|
||||
(void) snprintf(buf, buflen, "%llu", (longlong_t)bytes);
|
||||
else
|
||||
zfs_nicebytes(bytes, buf, buflen);
|
||||
}
|
||||
|
||||
static const char histo_stars[] = "****************************************";
|
||||
static const uint64_t histo_width = sizeof (histo_stars) - 1;
|
||||
|
||||
|
@ -2081,6 +2094,76 @@ dump_all_ddts(spa_t *spa)
|
|||
dump_dedup_ratio(&dds_total);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_brt(spa_t *spa)
|
||||
{
|
||||
if (!spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||
printf("BRT: unsupported on this pool\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||
printf("BRT: empty\n");
|
||||
return;
|
||||
}
|
||||
|
||||
brt_t *brt = spa->spa_brt;
|
||||
VERIFY(brt);
|
||||
|
||||
char count[32], used[32], saved[32];
|
||||
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
|
||||
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
|
||||
uint64_t ratio = brt_get_ratio(spa);
|
||||
printf("BRT: used %s; saved %s; ratio %llu.%02llux\n", used, saved,
|
||||
(u_longlong_t)(ratio / 100), (u_longlong_t)(ratio % 100));
|
||||
|
||||
if (dump_opt['T'] < 2)
|
||||
return;
|
||||
|
||||
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||
if (brtvd == NULL)
|
||||
continue;
|
||||
|
||||
if (!brtvd->bv_initiated) {
|
||||
printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
|
||||
continue;
|
||||
}
|
||||
|
||||
zdb_nicenum(brtvd->bv_totalcount, count, sizeof (count));
|
||||
zdb_nicebytes(brtvd->bv_usedspace, used, sizeof (used));
|
||||
zdb_nicebytes(brtvd->bv_savedspace, saved, sizeof (saved));
|
||||
printf("BRT: vdev %" PRIu64 ": refcnt %s; used %s; saved %s\n",
|
||||
vdevid, count, used, saved);
|
||||
}
|
||||
|
||||
if (dump_opt['T'] < 3)
|
||||
return;
|
||||
|
||||
char dva[64];
|
||||
printf("\n%-16s %-10s\n", "DVA", "REFCNT");
|
||||
|
||||
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||
if (brtvd == NULL || !brtvd->bv_initiated)
|
||||
continue;
|
||||
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
uint64_t offset = *(uint64_t *)za.za_name;
|
||||
uint64_t refcnt = za.za_first_integer;
|
||||
|
||||
snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid,
|
||||
(u_longlong_t)offset);
|
||||
printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
|
||||
{
|
||||
|
@ -8093,6 +8176,9 @@ dump_zpool(spa_t *spa)
|
|||
if (dump_opt['D'])
|
||||
dump_all_ddts(spa);
|
||||
|
||||
if (dump_opt['T'])
|
||||
dump_brt(spa);
|
||||
|
||||
if (dump_opt['d'] > 2 || dump_opt['m'])
|
||||
dump_metaslabs(spa);
|
||||
if (dump_opt['M'])
|
||||
|
@ -8879,6 +8965,7 @@ main(int argc, char **argv)
|
|||
{"io-stats", no_argument, NULL, 's'},
|
||||
{"simulate-dedup", no_argument, NULL, 'S'},
|
||||
{"txg", required_argument, NULL, 't'},
|
||||
{"brt-stats", no_argument, NULL, 'T'},
|
||||
{"uberblock", no_argument, NULL, 'u'},
|
||||
{"cachefile", required_argument, NULL, 'U'},
|
||||
{"verbose", no_argument, NULL, 'v'},
|
||||
|
@ -8892,7 +8979,7 @@ main(int argc, char **argv)
|
|||
};
|
||||
|
||||
while ((c = getopt_long(argc, argv,
|
||||
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
|
||||
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ",
|
||||
long_options, NULL)) != -1) {
|
||||
switch (c) {
|
||||
case 'b':
|
||||
|
@ -8914,6 +9001,7 @@ main(int argc, char **argv)
|
|||
case 'R':
|
||||
case 's':
|
||||
case 'S':
|
||||
case 'T':
|
||||
case 'u':
|
||||
case 'y':
|
||||
case 'Z':
|
||||
|
@ -9076,22 +9164,6 @@ main(int argc, char **argv)
|
|||
if (dump_opt['l'])
|
||||
return (dump_label(argv[0]));
|
||||
|
||||
if (dump_opt['O']) {
|
||||
if (argc != 2)
|
||||
usage();
|
||||
dump_opt['v'] = verbose + 3;
|
||||
return (dump_path(argv[0], argv[1], NULL));
|
||||
}
|
||||
if (dump_opt['r']) {
|
||||
target_is_spa = B_FALSE;
|
||||
if (argc != 3)
|
||||
usage();
|
||||
dump_opt['v'] = verbose;
|
||||
error = dump_path(argv[0], argv[1], &object);
|
||||
if (error != 0)
|
||||
fatal("internal error: %s", strerror(error));
|
||||
}
|
||||
|
||||
if (dump_opt['X'] || dump_opt['F'])
|
||||
rewind = ZPOOL_DO_REWIND |
|
||||
(dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
|
||||
|
@ -9192,6 +9264,29 @@ main(int argc, char **argv)
|
|||
searchdirs = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to make sure to process -O option or call
|
||||
* dump_path after the -e option has been processed,
|
||||
* which imports the pool to the namespace if it's
|
||||
* not in the cachefile.
|
||||
*/
|
||||
if (dump_opt['O']) {
|
||||
if (argc != 2)
|
||||
usage();
|
||||
dump_opt['v'] = verbose + 3;
|
||||
return (dump_path(argv[0], argv[1], NULL));
|
||||
}
|
||||
|
||||
if (dump_opt['r']) {
|
||||
target_is_spa = B_FALSE;
|
||||
if (argc != 3)
|
||||
usage();
|
||||
dump_opt['v'] = verbose;
|
||||
error = dump_path(argv[0], argv[1], &object);
|
||||
if (error != 0)
|
||||
fatal("internal error: %s", strerror(error));
|
||||
}
|
||||
|
||||
/*
|
||||
* import_checkpointed_state makes the assumption that the
|
||||
* target pool that we pass it is already part of the spa
|
||||
|
|
|
@ -47,7 +47,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
|
|||
#include <linux/fs.h>
|
||||
],[
|
||||
struct inode ip;
|
||||
struct timespec64 ts;
|
||||
struct timespec64 ts = {0};
|
||||
|
||||
memset(&ip, 0, sizeof(ip));
|
||||
inode_set_ctime_to_ts(&ip, ts);
|
||||
|
|
|
@ -33,6 +33,7 @@ COMMON_H = \
|
|||
sys/bqueue.h \
|
||||
sys/btree.h \
|
||||
sys/brt.h \
|
||||
sys/brt_impl.h \
|
||||
sys/dataset_kstats.h \
|
||||
sys/dbuf.h \
|
||||
sys/ddt.h \
|
||||
|
|
|
@ -101,7 +101,7 @@ void vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
|
|||
void vfs_clearmntopt(vfs_t *vfsp, const char *name);
|
||||
int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
|
||||
int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
|
||||
char *fspath, char *fspec, int fsflags);
|
||||
char *fspath, char *fspec, int fsflags, vfs_t *parent_vfsp);
|
||||
|
||||
typedef uint64_t vfs_feature_t;
|
||||
|
||||
|
|
|
@ -56,6 +56,7 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
|
|||
#ifndef IN_BASE
|
||||
#include_next <sys/vnode.h>
|
||||
#endif
|
||||
#include <sys/ccompat.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/cred.h>
|
||||
#include <sys/fcntl.h>
|
||||
|
@ -104,7 +105,7 @@ vn_flush_cached_data(vnode_t *vp, boolean_t sync)
|
|||
zfs_vmobject_wlock(vp->v_object);
|
||||
vm_object_page_clean(vp->v_object, 0, 0, flags);
|
||||
zfs_vmobject_wunlock(vp->v_object);
|
||||
VOP_UNLOCK(vp);
|
||||
VOP_UNLOCK1(vp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BRT_IMPL_H
|
||||
#define _SYS_BRT_IMPL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* BRT - Block Reference Table.
|
||||
*/
|
||||
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
|
||||
|
||||
/*
|
||||
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
|
||||
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
|
||||
* Each element in this array represents how many BRT entries do we have in this
|
||||
* chunk of storage. We always load this entire array into memory and update as
|
||||
* needed. By having it in memory we can quickly tell (during zio_free()) if
|
||||
* there are any BRT entries that we might need to update.
|
||||
*
|
||||
* This value cannot be larger than 16MB, at least as long as we support
|
||||
* 512 byte block sizes. With 512 byte block size we can have exactly
|
||||
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
|
||||
* many for a 16bit counter.
|
||||
*/
|
||||
#define BRT_RANGESIZE (16 * 1024 * 1024)
|
||||
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
|
||||
"BRT_RANGESIZE is too large.");
|
||||
/*
|
||||
* We don't want to update the whole structure every time. Maintain bitmap
|
||||
* of dirty blocks within the regions, so that a single bit represents a
|
||||
* block size of entcounts. For example if we have a 1PB vdev then all
|
||||
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
|
||||
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
|
||||
* the whole 128MB on disk when we have updated only a single entcount.
|
||||
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
|
||||
* is represented by a single bit. This gives us 4096 bits. A set bit in the
|
||||
* bitmap means that we had a change in at least one of the 16384 entcounts
|
||||
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
|
||||
*/
|
||||
#define BRT_BLOCKSIZE (32 * 1024)
|
||||
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
|
||||
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
|
||||
|
||||
#define BRT_LITTLE_ENDIAN 0
|
||||
#define BRT_BIG_ENDIAN 1
|
||||
#ifdef _ZFS_LITTLE_ENDIAN
|
||||
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||
#else
|
||||
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
typedef struct brt_vdev_phys {
|
||||
uint64_t bvp_mos_entries;
|
||||
uint64_t bvp_size;
|
||||
uint64_t bvp_byteorder;
|
||||
uint64_t bvp_totalcount;
|
||||
uint64_t bvp_rangesize;
|
||||
uint64_t bvp_usedspace;
|
||||
uint64_t bvp_savedspace;
|
||||
} brt_vdev_phys_t;
|
||||
|
||||
typedef struct brt_vdev {
|
||||
/*
|
||||
* VDEV id.
|
||||
*/
|
||||
uint64_t bv_vdevid;
|
||||
/*
|
||||
* Is the structure initiated?
|
||||
* (bv_entcount and bv_bitmap are allocated?)
|
||||
*/
|
||||
boolean_t bv_initiated;
|
||||
/*
|
||||
* Object number in the MOS for the entcount array and brt_vdev_phys.
|
||||
*/
|
||||
uint64_t bv_mos_brtvdev;
|
||||
/*
|
||||
* Object number in the MOS for the entries table.
|
||||
*/
|
||||
uint64_t bv_mos_entries;
|
||||
/*
|
||||
* Entries to sync.
|
||||
*/
|
||||
avl_tree_t bv_tree;
|
||||
/*
|
||||
* Does the bv_entcount[] array needs byte swapping?
|
||||
*/
|
||||
boolean_t bv_need_byteswap;
|
||||
/*
|
||||
* Number of entries in the bv_entcount[] array.
|
||||
*/
|
||||
uint64_t bv_size;
|
||||
/*
|
||||
* This is the array with BRT entry count per BRT_RANGESIZE.
|
||||
*/
|
||||
uint16_t *bv_entcount;
|
||||
/*
|
||||
* Sum of all bv_entcount[]s.
|
||||
*/
|
||||
uint64_t bv_totalcount;
|
||||
/*
|
||||
* Space on disk occupied by cloned blocks (without compression).
|
||||
*/
|
||||
uint64_t bv_usedspace;
|
||||
/*
|
||||
* How much additional space would be occupied without block cloning.
|
||||
*/
|
||||
uint64_t bv_savedspace;
|
||||
/*
|
||||
* brt_vdev_phys needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_meta_dirty;
|
||||
/*
|
||||
* bv_entcount[] needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_entcount_dirty;
|
||||
/*
|
||||
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
||||
* when we just changed few entcounts. The fields below allow us to
|
||||
* track updates to bv_entcount[] array since the last sync.
|
||||
* A single bit in the bv_bitmap represents as many entcounts as can
|
||||
* fit into a single BRT_BLOCKSIZE.
|
||||
* For example we have 65536 entcounts in the bv_entcount array
|
||||
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
||||
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
||||
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
||||
*/
|
||||
ulong_t *bv_bitmap;
|
||||
uint64_t bv_nblocks;
|
||||
} brt_vdev_t;
|
||||
|
||||
/*
|
||||
* In-core brt
|
||||
*/
|
||||
typedef struct brt {
|
||||
krwlock_t brt_lock;
|
||||
spa_t *brt_spa;
|
||||
#define brt_mos brt_spa->spa_meta_objset
|
||||
uint64_t brt_rangesize;
|
||||
uint64_t brt_usedspace;
|
||||
uint64_t brt_savedspace;
|
||||
avl_tree_t brt_pending_tree[TXG_SIZE];
|
||||
kmutex_t brt_pending_lock[TXG_SIZE];
|
||||
/* Sum of all entries across all bv_trees. */
|
||||
uint64_t brt_nentries;
|
||||
brt_vdev_t *brt_vdevs;
|
||||
uint64_t brt_nvdevs;
|
||||
} brt_t;
|
||||
|
||||
/* Size of bre_offset / sizeof (uint64_t). */
|
||||
#define BRT_KEY_WORDS (1)
|
||||
|
||||
/*
|
||||
* In-core brt entry.
|
||||
* On-disk we use bre_offset as the key and bre_refcount as the value.
|
||||
*/
|
||||
typedef struct brt_entry {
|
||||
uint64_t bre_offset;
|
||||
uint64_t bre_refcount;
|
||||
avl_node_t bre_node;
|
||||
} brt_entry_t;
|
||||
|
||||
typedef struct brt_pending_entry {
|
||||
blkptr_t bpe_bp;
|
||||
int bpe_count;
|
||||
avl_node_t bpe_node;
|
||||
} brt_pending_entry_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BRT_IMPL_H */
|
|
@ -1072,8 +1072,7 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
|
|||
int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t length, struct blkptr *bps, size_t *nbpsp);
|
||||
int dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps,
|
||||
boolean_t replay);
|
||||
uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps);
|
||||
|
||||
/*
|
||||
* Initial setup and final teardown.
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
||||
.\" Copyright (c) 2017 Intel Corporation.
|
||||
.\"
|
||||
.Dd June 27, 2023
|
||||
.Dd November 18, 2023
|
||||
.Dt ZDB 8
|
||||
.Os
|
||||
.
|
||||
|
@ -23,7 +23,7 @@
|
|||
.Nd display ZFS storage pool debugging and consistency information
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl AbcdDFGhikLMNPsvXYy
|
||||
.Op Fl AbcdDFGhikLMNPsTvXYy
|
||||
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
|
||||
.Op Fl I Ar inflight-I/O-ops
|
||||
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns …
|
||||
|
@ -403,6 +403,13 @@ Display operation counts, bandwidth, and error counts of I/O to the pool from
|
|||
Simulate the effects of deduplication, constructing a DDT and then display
|
||||
that DDT as with
|
||||
.Fl DD .
|
||||
.It Fl T , -brt-stats
|
||||
Display block reference table (BRT) statistics, including the size of uniques
|
||||
blocks cloned, the space saving as a result of cloning, and the saving ratio.
|
||||
.It Fl TT
|
||||
Display the per-vdev BRT statistics, including total references.
|
||||
.It Fl TTT
|
||||
Dump the contents of the block reference tables.
|
||||
.It Fl u , -uberblock
|
||||
Display the current uberblock.
|
||||
.El
|
||||
|
|
|
@ -120,7 +120,7 @@ vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
|
|||
|
||||
int
|
||||
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
||||
char *fspec, int fsflags)
|
||||
char *fspec, int fsflags, vfs_t *parent_vfsp)
|
||||
{
|
||||
struct vfsconf *vfsp;
|
||||
struct mount *mp;
|
||||
|
@ -220,6 +220,13 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
|||
mp->mnt_opt = mp->mnt_optnew;
|
||||
(void) VFS_STATFS(mp, &mp->mnt_stat);
|
||||
|
||||
#ifdef VFS_SUPPORTS_EXJAIL_CLONE
|
||||
/*
|
||||
* Clone the mnt_exjail credentials of the parent, as required.
|
||||
*/
|
||||
vfs_exjail_clone(parent_vfsp, mp);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Prevent external consumers of mount options from reading
|
||||
* mnt_optnew.
|
||||
|
|
|
@ -32,11 +32,7 @@ __FBSDID("$FreeBSD$");
|
|||
#include <sys/kmem.h>
|
||||
#include <sys/kmem_cache.h>
|
||||
#include <sys/zmod.h>
|
||||
#if __FreeBSD_version >= 1300041
|
||||
#include <contrib/zlib/zlib.h>
|
||||
#else
|
||||
#include <sys/zlib.h>
|
||||
#endif
|
||||
#include <sys/kobj.h>
|
||||
|
||||
|
||||
|
@ -90,11 +86,7 @@ zlib_inflateInit(z_stream *stream)
|
|||
static int
|
||||
zlib_inflate(z_stream *stream, int finish)
|
||||
{
|
||||
#if __FreeBSD_version >= 1300024
|
||||
return (inflate(stream, finish));
|
||||
#else
|
||||
return (_zlib104_inflate(stream, finish));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ knlist_sx_xunlock(void *arg)
|
|||
sx_xunlock((struct sx *)arg);
|
||||
}
|
||||
|
||||
#if __FreeBSD_version >= 1300128
|
||||
static void
|
||||
knlist_sx_assert_lock(void *arg, int what)
|
||||
{
|
||||
|
@ -55,11 +56,28 @@ knlist_sx_assert_lock(void *arg, int what)
|
|||
else
|
||||
sx_assert((struct sx *)arg, SX_UNLOCKED);
|
||||
}
|
||||
#else
|
||||
static void
|
||||
knlist_sx_assert_locked(void *arg)
|
||||
{
|
||||
sx_assert((struct sx *)arg, SX_LOCKED);
|
||||
}
|
||||
static void
|
||||
knlist_sx_assert_unlocked(void *arg)
|
||||
{
|
||||
sx_assert((struct sx *)arg, SX_UNLOCKED);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
knlist_init_sx(struct knlist *knl, struct sx *lock)
|
||||
{
|
||||
|
||||
#if __FreeBSD_version >= 1300128
|
||||
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
|
||||
knlist_sx_assert_lock);
|
||||
#else
|
||||
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
|
||||
knlist_sx_assert_locked, knlist_sx_assert_unlocked);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1026,7 +1026,8 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
|
|||
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
|
||||
dvp->v_vfsp->mnt_stat.f_mntonname, name);
|
||||
|
||||
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
|
||||
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0,
|
||||
dvp->v_vfsp);
|
||||
kmem_free(mountpoint, mountpoint_len);
|
||||
if (err == 0) {
|
||||
/*
|
||||
|
|
|
@ -6213,6 +6213,7 @@ zfs_deallocate(struct vop_deallocate_args *ap)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if __FreeBSD_version >= 1300039
|
||||
#ifndef _SYS_SYSPROTO_H_
|
||||
struct vop_copy_file_range_args {
|
||||
struct vnode *a_invp;
|
||||
|
@ -6319,6 +6320,7 @@ bad_write_fallback:
|
|||
ap->a_incred, ap->a_outcred, ap->a_fsizetd);
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct vop_vector zfs_vnodeops;
|
||||
struct vop_vector zfs_fifoops;
|
||||
|
@ -6383,7 +6385,9 @@ struct vop_vector zfs_vnodeops = {
|
|||
#if __FreeBSD_version >= 1400043
|
||||
.vop_add_writecount = vop_stdadd_writecount_nomsync,
|
||||
#endif
|
||||
#if __FreeBSD_version >= 1300039
|
||||
.vop_copy_file_range = zfs_freebsd_copy_file_range,
|
||||
#endif
|
||||
};
|
||||
VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
|
||||
|
||||
|
|
178
module/zfs/brt.c
178
module/zfs/brt.c
|
@ -28,6 +28,7 @@
|
|||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/brt.h>
|
||||
#include <sys/brt_impl.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/bitmap.h>
|
||||
#include <sys/zap.h>
|
||||
|
@ -234,178 +235,15 @@
|
|||
* destination dataset is mounted and its ZIL replayed.
|
||||
* To address this situation we leverage zil_claim() mechanism where ZFS will
|
||||
* parse all the ZILs on pool import. When we come across TX_CLONE_RANGE
|
||||
* entries, we will bump reference counters for their BPs in the BRT and then
|
||||
* on mount and ZIL replay we will just attach BPs to the file without
|
||||
* bumping reference counters.
|
||||
* Note it is still possible that after zil_claim() we never mount the
|
||||
* destination, so we never replay its ZIL and we destroy it. This way we would
|
||||
* end up with leaked references in BRT. We address that too as ZFS gives us
|
||||
* a chance to clean this up on dataset destroy (see zil_free_clone_range()).
|
||||
* entries, we will bump reference counters for their BPs in the BRT. Then
|
||||
* on mount and ZIL replay we bump the reference counters once more, while the
|
||||
* first references are dropped during ZIL destroy by zil_free_clone_range().
|
||||
* It is possible that after zil_claim() we never mount the destination, so
|
||||
* we never replay its ZIL and just destroy it. In this case the only taken
|
||||
* references will be dropped by zil_free_clone_range(), since the cloning is
|
||||
* not going to ever take place.
|
||||
*/
|
||||
|
||||
/*
|
||||
* BRT - Block Reference Table.
|
||||
*/
|
||||
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
|
||||
|
||||
/*
|
||||
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
|
||||
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
|
||||
* Each element in this array represents how many BRT entries do we have in this
|
||||
* chunk of storage. We always load this entire array into memory and update as
|
||||
* needed. By having it in memory we can quickly tell (during zio_free()) if
|
||||
* there are any BRT entries that we might need to update.
|
||||
*
|
||||
* This value cannot be larger than 16MB, at least as long as we support
|
||||
* 512 byte block sizes. With 512 byte block size we can have exactly
|
||||
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
|
||||
* many for a 16bit counter.
|
||||
*/
|
||||
#define BRT_RANGESIZE (16 * 1024 * 1024)
|
||||
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
|
||||
"BRT_RANGESIZE is too large.");
|
||||
/*
|
||||
* We don't want to update the whole structure every time. Maintain bitmap
|
||||
* of dirty blocks within the regions, so that a single bit represents a
|
||||
* block size of entcounts. For example if we have a 1PB vdev then all
|
||||
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
|
||||
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
|
||||
* the whole 128MB on disk when we have updated only a single entcount.
|
||||
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
|
||||
* is represented by a single bit. This gives us 4096 bits. A set bit in the
|
||||
* bitmap means that we had a change in at least one of the 16384 entcounts
|
||||
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
|
||||
*/
|
||||
#define BRT_BLOCKSIZE (32 * 1024)
|
||||
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
|
||||
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
|
||||
|
||||
#define BRT_LITTLE_ENDIAN 0
|
||||
#define BRT_BIG_ENDIAN 1
|
||||
#ifdef _ZFS_LITTLE_ENDIAN
|
||||
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||
#else
|
||||
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
typedef struct brt_vdev_phys {
|
||||
uint64_t bvp_mos_entries;
|
||||
uint64_t bvp_size;
|
||||
uint64_t bvp_byteorder;
|
||||
uint64_t bvp_totalcount;
|
||||
uint64_t bvp_rangesize;
|
||||
uint64_t bvp_usedspace;
|
||||
uint64_t bvp_savedspace;
|
||||
} brt_vdev_phys_t;
|
||||
|
||||
typedef struct brt_vdev {
|
||||
/*
|
||||
* VDEV id.
|
||||
*/
|
||||
uint64_t bv_vdevid;
|
||||
/*
|
||||
* Is the structure initiated?
|
||||
* (bv_entcount and bv_bitmap are allocated?)
|
||||
*/
|
||||
boolean_t bv_initiated;
|
||||
/*
|
||||
* Object number in the MOS for the entcount array and brt_vdev_phys.
|
||||
*/
|
||||
uint64_t bv_mos_brtvdev;
|
||||
/*
|
||||
* Object number in the MOS for the entries table.
|
||||
*/
|
||||
uint64_t bv_mos_entries;
|
||||
/*
|
||||
* Entries to sync.
|
||||
*/
|
||||
avl_tree_t bv_tree;
|
||||
/*
|
||||
* Does the bv_entcount[] array needs byte swapping?
|
||||
*/
|
||||
boolean_t bv_need_byteswap;
|
||||
/*
|
||||
* Number of entries in the bv_entcount[] array.
|
||||
*/
|
||||
uint64_t bv_size;
|
||||
/*
|
||||
* This is the array with BRT entry count per BRT_RANGESIZE.
|
||||
*/
|
||||
uint16_t *bv_entcount;
|
||||
/*
|
||||
* Sum of all bv_entcount[]s.
|
||||
*/
|
||||
uint64_t bv_totalcount;
|
||||
/*
|
||||
* Space on disk occupied by cloned blocks (without compression).
|
||||
*/
|
||||
uint64_t bv_usedspace;
|
||||
/*
|
||||
* How much additional space would be occupied without block cloning.
|
||||
*/
|
||||
uint64_t bv_savedspace;
|
||||
/*
|
||||
* brt_vdev_phys needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_meta_dirty;
|
||||
/*
|
||||
* bv_entcount[] needs updating on disk.
|
||||
*/
|
||||
boolean_t bv_entcount_dirty;
|
||||
/*
|
||||
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
||||
* when we just changed few entcounts. The fields below allow us to
|
||||
* track updates to bv_entcount[] array since the last sync.
|
||||
* A single bit in the bv_bitmap represents as many entcounts as can
|
||||
* fit into a single BRT_BLOCKSIZE.
|
||||
* For example we have 65536 entcounts in the bv_entcount array
|
||||
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
||||
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
||||
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
||||
*/
|
||||
ulong_t *bv_bitmap;
|
||||
uint64_t bv_nblocks;
|
||||
} brt_vdev_t;
|
||||
|
||||
/*
|
||||
* In-core brt
|
||||
*/
|
||||
typedef struct brt {
|
||||
krwlock_t brt_lock;
|
||||
spa_t *brt_spa;
|
||||
#define brt_mos brt_spa->spa_meta_objset
|
||||
uint64_t brt_rangesize;
|
||||
uint64_t brt_usedspace;
|
||||
uint64_t brt_savedspace;
|
||||
avl_tree_t brt_pending_tree[TXG_SIZE];
|
||||
kmutex_t brt_pending_lock[TXG_SIZE];
|
||||
/* Sum of all entries across all bv_trees. */
|
||||
uint64_t brt_nentries;
|
||||
brt_vdev_t *brt_vdevs;
|
||||
uint64_t brt_nvdevs;
|
||||
} brt_t;
|
||||
|
||||
/* Size of bre_offset / sizeof (uint64_t). */
|
||||
#define BRT_KEY_WORDS (1)
|
||||
|
||||
/*
|
||||
* In-core brt entry.
|
||||
* On-disk we use bre_offset as the key and bre_refcount as the value.
|
||||
*/
|
||||
typedef struct brt_entry {
|
||||
uint64_t bre_offset;
|
||||
uint64_t bre_refcount;
|
||||
avl_node_t bre_node;
|
||||
} brt_entry_t;
|
||||
|
||||
typedef struct brt_pending_entry {
|
||||
blkptr_t bpe_bp;
|
||||
int bpe_count;
|
||||
avl_node_t bpe_node;
|
||||
} brt_pending_entry_t;
|
||||
|
||||
static kmem_cache_t *brt_entry_cache;
|
||||
static kmem_cache_t *brt_pending_entry_cache;
|
||||
|
||||
|
|
|
@ -2700,15 +2700,23 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||
* writes and clones into this block.
|
||||
*/
|
||||
mutex_enter(&db->db_mtx);
|
||||
DBUF_VERIFY(db);
|
||||
VERIFY(!dbuf_undirty(db, tx));
|
||||
ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL);
|
||||
if (db->db_buf != NULL) {
|
||||
arc_buf_destroy(db->db_buf, db);
|
||||
db->db_buf = NULL;
|
||||
dbuf_clear_data(db);
|
||||
}
|
||||
|
||||
db->db_state = DB_NOFILL;
|
||||
DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone");
|
||||
|
||||
DBUF_VERIFY(db);
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
||||
dmu_buf_will_not_fill(db_fake, tx);
|
||||
dbuf_noread(db);
|
||||
(void) dbuf_dirty(db, tx);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -2267,7 +2267,7 @@ out:
|
|||
|
||||
int
|
||||
dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
||||
dmu_tx_t *tx, const blkptr_t *bps, size_t nbps, boolean_t replay)
|
||||
dmu_tx_t *tx, const blkptr_t *bps, size_t nbps)
|
||||
{
|
||||
spa_t *spa;
|
||||
dmu_buf_t **dbp, *dbuf;
|
||||
|
@ -2341,10 +2341,8 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
|||
* When data in embedded into BP there is no need to create
|
||||
* BRT entry as there is no data block. Just copy the BP as
|
||||
* it contains the data.
|
||||
* Also, when replaying ZIL we don't want to bump references
|
||||
* in the BRT as it was already done during ZIL claim.
|
||||
*/
|
||||
if (!replay && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
brt_pending_add(spa, bp, tx);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1764,7 +1764,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
|
|||
}
|
||||
|
||||
/*
|
||||
* Checks if the dnode contains any uncommitted dirty records.
|
||||
* Checks if the dnode itself is dirty, or is carrying any uncommitted records.
|
||||
* It is important to check both conditions, as some operations (eg appending
|
||||
* to a file) can dirty both as a single logical unit, but they are not synced
|
||||
* out atomically, so checking one and not the other can result in an object
|
||||
* appearing to be clean mid-way through a commit.
|
||||
*
|
||||
* Do not change this lightly! If you get it wrong, dmu_offset_next() can
|
||||
* detect a hole where there is really data, leading to silent corruption.
|
||||
*/
|
||||
boolean_t
|
||||
dnode_is_dirty(dnode_t *dn)
|
||||
|
@ -1772,7 +1779,8 @@ dnode_is_dirty(dnode_t *dn)
|
|||
mutex_enter(&dn->dn_mtx);
|
||||
|
||||
for (int i = 0; i < TXG_SIZE; i++) {
|
||||
if (multilist_link_active(&dn->dn_dirty_link[i])) {
|
||||
if (multilist_link_active(&dn->dn_dirty_link[i]) ||
|
||||
!list_is_empty(&dn->dn_dirty_records[i])) {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
|
|
@ -1333,7 +1333,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||
}
|
||||
|
||||
error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
|
||||
bps, nbps, B_FALSE);
|
||||
bps, nbps);
|
||||
if (error != 0) {
|
||||
dmu_tx_commit(tx);
|
||||
break;
|
||||
|
@ -1467,7 +1467,7 @@ zfs_clone_range_replay(znode_t *zp, uint64_t off, uint64_t len, uint64_t blksz,
|
|||
if (zp->z_blksz < blksz)
|
||||
zfs_grow_blocksize(zp, blksz, tx);
|
||||
|
||||
dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps, B_TRUE);
|
||||
dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps);
|
||||
|
||||
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
|
||||
|
||||
|
|
|
@ -158,22 +158,23 @@ zio_init(void)
|
|||
zio_link_cache = kmem_cache_create("zio_link_cache",
|
||||
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
|
||||
/*
|
||||
* For small buffers, we want a cache for each multiple of
|
||||
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
|
||||
* for each quarter-power of 2.
|
||||
*/
|
||||
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
|
||||
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
|
||||
size_t align, cflags, data_cflags;
|
||||
char name[32];
|
||||
|
||||
/*
|
||||
* Create cache for each half-power of 2 size, starting from
|
||||
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
|
||||
* of ~7/8, sufficient for transient allocations mostly using
|
||||
* these caches.
|
||||
*/
|
||||
size_t p2 = size;
|
||||
size_t align = 0;
|
||||
size_t data_cflags, cflags;
|
||||
|
||||
data_cflags = KMC_NODEBUG;
|
||||
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
|
||||
KMC_NODEBUG : 0;
|
||||
|
||||
while (!ISP2(p2))
|
||||
p2 &= p2 - 1;
|
||||
if (!IS_P2ALIGNED(size, p2 / 2))
|
||||
continue;
|
||||
|
||||
#ifndef _KERNEL
|
||||
/*
|
||||
|
@ -184,37 +185,47 @@ zio_init(void)
|
|||
*/
|
||||
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
|
||||
continue;
|
||||
/*
|
||||
* Here's the problem - on 4K native devices in userland on
|
||||
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
|
||||
* will fail with EINVAL, causing zdb (and others) to coredump.
|
||||
* Since userland probably doesn't need optimized buffer caches,
|
||||
* we just force 4K alignment on everything.
|
||||
*/
|
||||
align = 8 * SPA_MINBLOCKSIZE;
|
||||
#else
|
||||
if (size < PAGESIZE) {
|
||||
align = SPA_MINBLOCKSIZE;
|
||||
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
|
||||
align = PAGESIZE;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (IS_P2ALIGNED(size, PAGESIZE))
|
||||
align = PAGESIZE;
|
||||
else
|
||||
align = 1 << (highbit64(size ^ (size - 1)) - 1);
|
||||
if (align != 0) {
|
||||
char name[36];
|
||||
if (cflags == data_cflags) {
|
||||
/*
|
||||
* Resulting kmem caches would be identical.
|
||||
* Save memory by creating only one.
|
||||
*/
|
||||
(void) snprintf(name, sizeof (name),
|
||||
"zio_buf_comb_%lu", (ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name,
|
||||
size, align, NULL, NULL, NULL, NULL, NULL,
|
||||
cflags);
|
||||
zio_data_buf_cache[c] = zio_buf_cache[c];
|
||||
continue;
|
||||
}
|
||||
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size,
|
||||
align, NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
|
||||
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
|
||||
KMC_NODEBUG : 0;
|
||||
data_cflags = KMC_NODEBUG;
|
||||
if (cflags == data_cflags) {
|
||||
/*
|
||||
* Resulting kmem caches would be identical.
|
||||
* Save memory by creating only one.
|
||||
*/
|
||||
(void) snprintf(name, sizeof (name),
|
||||
"zio_buf_comb_%lu", (ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
zio_data_buf_cache[c] = zio_buf_cache[c];
|
||||
continue;
|
||||
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_data_buf_cache[c] = kmem_cache_create(name, size,
|
||||
align, NULL, NULL, NULL, NULL, NULL, data_cflags);
|
||||
}
|
||||
(void) snprintf(name, sizeof (name), "zio_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, cflags);
|
||||
|
||||
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
|
||||
(ulong_t)size);
|
||||
zio_data_buf_cache[c] = kmem_cache_create(name, size, align,
|
||||
NULL, NULL, NULL, NULL, NULL, data_cflags);
|
||||
}
|
||||
|
||||
while (--c != 0) {
|
||||
|
|
|
@ -103,6 +103,7 @@ if [ -d ${dkms_root}/%{module} ]; then
|
|||
fi
|
||||
fi
|
||||
done
|
||||
cd ${dkms_root}
|
||||
fi
|
||||
|
||||
# Uninstall this version of zfs dkms modules before installation of the package.
|
||||
|
|
|
@ -58,7 +58,7 @@ set -A args "create" "add" "destroy" "import fakepool" \
|
|||
"setvprop" "blah blah" "-%" "--?" "-*" "-=" \
|
||||
"-a" "-f" "-g" "-j" "-n" "-o" "-p" "-p /tmp" \
|
||||
"-t" "-w" "-z" "-E" "-H" "-I" "-J" \
|
||||
"-Q" "-R" "-T" "-W"
|
||||
"-Q" "-R" "-W"
|
||||
|
||||
log_assert "Execute zdb using invalid parameters."
|
||||
|
||||
|
|
|
@ -123,7 +123,10 @@ if not httpd:
|
|||
with open('$HTTPS_PORT_FILE', 'w') as portf:
|
||||
print(port, file=portf)
|
||||
|
||||
httpd.socket = ssl.wrap_socket(httpd.socket, server_side=True, keyfile='/$TESTPOOL/snakeoil.key', certfile='$SSL_CA_CERT_FILE', ssl_version=ssl.PROTOCOL_TLS)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.check_hostname = False
|
||||
sslctx.load_cert_chain(certfile='$SSL_CA_CERT_FILE', keyfile='/$TESTPOOL/snakeoil.key')
|
||||
httpd.socket = httpd.socket = sslctx.wrap_socket(httpd.socket, server_side=True)
|
||||
|
||||
os.chdir('$STF_SUITE/tests/functional/cli_root/zfs_load-key')
|
||||
|
||||
|
|
Loading…
Reference in New Issue