Merge 3070faa798
into 1713aa7b4d
This commit is contained in:
commit
39a4002f3d
|
@ -57,6 +57,8 @@ dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2
|
|||
dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
|
||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash
|
||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash.descrip
|
||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.zia
|
||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.zia.descrip
|
||||
|
||||
@CODE_COVERAGE_RULES@
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\"
|
|||
AM_CPPFLAGS += $(DEBUG_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS)
|
||||
AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\"
|
||||
AM_CPPFLAGS += $(ZIA_CPPFLAGS)
|
||||
|
||||
if ASAN_ENABLED
|
||||
AM_CPPFLAGS += -DZFS_ASAN_ENABLED
|
||||
|
|
|
@ -263,6 +263,8 @@ AC_DEFUN([ZFS_AC_CONFIG], [
|
|||
AC_SUBST(TEST_JOBS)
|
||||
])
|
||||
|
||||
ZFS_AC_ZIA
|
||||
|
||||
ZFS_INIT_SYSV=
|
||||
ZFS_INIT_SYSTEMD=
|
||||
ZFS_WANT_MODULES_LOAD_D=
|
||||
|
@ -294,7 +296,8 @@ AC_DEFUN([ZFS_AC_CONFIG], [
|
|||
[test "x$qatsrc" != x ])
|
||||
AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
|
||||
AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
|
||||
AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes])
|
||||
AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes ])
|
||||
AM_CONDITIONAL([ZIA_ENABLED], [test "x$enable_zia" = xyes ])
|
||||
])
|
||||
|
||||
dnl #
|
||||
|
@ -342,6 +345,10 @@ AC_DEFUN([ZFS_AC_RPM], [
|
|||
RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_zia" = xyes], [
|
||||
RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(WITH_ZIA) 1" --define "DPUSM_ROOT $(DPUSM_ROOT)"'
|
||||
])
|
||||
|
||||
RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'
|
||||
|
||||
dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
dnl # Adds --with-zia=PATH to configuration options
|
||||
dnl # The path provided should point to the DPUSM
|
||||
dnl # root and contain Module.symvers.
|
||||
AC_DEFUN([ZFS_AC_ZIA], [
|
||||
AC_ARG_WITH([zia],
|
||||
AS_HELP_STRING([--with-zia=PATH],
|
||||
[Path to Data Processing Services Module]),
|
||||
[
|
||||
DPUSM_ROOT="$withval"
|
||||
AS_IF([test "x$DPUSM_ROOT" != "xno"],
|
||||
[enable_zia=yes],
|
||||
[enable_zia=no])
|
||||
],
|
||||
[enable_zia=no]
|
||||
)
|
||||
|
||||
AS_IF([test "x$enable_zia" == "xyes"],
|
||||
AS_IF([! test -d "$DPUSM_ROOT"],
|
||||
[AC_MSG_ERROR([--with-zia=PATH requires the DPUSM root directory])]
|
||||
)
|
||||
|
||||
DPUSM_SYMBOLS="$DPUSM_ROOT/Module.symvers"
|
||||
|
||||
AS_IF([test -r $DPUSM_SYMBOLS],
|
||||
[
|
||||
AC_MSG_RESULT([$DPUSM_SYMBOLS])
|
||||
ZIA_CPPFLAGS="-DZIA=1 -I$DPUSM_ROOT/include"
|
||||
KERNEL_ZIA_CPPFLAGS="-DZIA=1 -I$DPUSM_ROOT/include"
|
||||
WITH_ZIA="_with_zia"
|
||||
|
||||
AC_SUBST(WITH_ZIA)
|
||||
AC_SUBST(KERNEL_ZIA_CPPFLAGS)
|
||||
AC_SUBST(ZIA_CPPFLAGS)
|
||||
AC_SUBST(DPUSM_SYMBOLS)
|
||||
AC_SUBST(DPUSM_ROOT)
|
||||
],
|
||||
[
|
||||
AC_MSG_ERROR([
|
||||
*** Failed to find Module.symvers in:
|
||||
$DPUSM_SYMBOLS
|
||||
])
|
||||
]
|
||||
)
|
||||
)
|
||||
])
|
|
@ -143,6 +143,9 @@ COMMON_H = \
|
|||
sys/zfs_vfsops.h \
|
||||
sys/zfs_vnops.h \
|
||||
sys/zfs_znode.h \
|
||||
sys/zia.h \
|
||||
sys/zia_cddl.h \
|
||||
sys/zia_private.h \
|
||||
sys/zil.h \
|
||||
sys/zil_impl.h \
|
||||
sys/zio.h \
|
||||
|
|
|
@ -64,6 +64,7 @@ typedef struct abd {
|
|||
list_t abd_gang_chain;
|
||||
} abd_gang;
|
||||
} abd_u;
|
||||
void *abd_zia_handle;
|
||||
} abd_t;
|
||||
|
||||
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
||||
|
|
|
@ -261,6 +261,19 @@ typedef enum {
|
|||
ZPOOL_PROP_DEDUP_TABLE_SIZE,
|
||||
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
|
||||
ZPOOL_PROP_DEDUPCACHED,
|
||||
ZPOOL_PROP_ZIA_AVAILABLE,
|
||||
ZPOOL_PROP_ZIA_PROVIDER,
|
||||
ZPOOL_PROP_ZIA_COMPRESS,
|
||||
ZPOOL_PROP_ZIA_DECOMPRESS,
|
||||
ZPOOL_PROP_ZIA_CHECKSUM,
|
||||
ZPOOL_PROP_ZIA_RAIDZ1_GEN,
|
||||
ZPOOL_PROP_ZIA_RAIDZ2_GEN,
|
||||
ZPOOL_PROP_ZIA_RAIDZ3_GEN,
|
||||
ZPOOL_PROP_ZIA_RAIDZ1_REC,
|
||||
ZPOOL_PROP_ZIA_RAIDZ2_REC,
|
||||
ZPOOL_PROP_ZIA_RAIDZ3_REC,
|
||||
ZPOOL_PROP_ZIA_FILE_WRITE,
|
||||
ZPOOL_PROP_ZIA_DISK_WRITE,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <sys/zfeature.h>
|
||||
#include <sys/zthr.h>
|
||||
#include <sys/dsl_deadlist.h>
|
||||
#include <sys/zia.h>
|
||||
#include <zfeature_common.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -479,6 +480,8 @@ struct spa {
|
|||
*/
|
||||
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
||||
zfs_refcount_t spa_refcount; /* number of opens */
|
||||
|
||||
zia_props_t spa_zia_props;
|
||||
};
|
||||
|
||||
extern char *spa_config_path;
|
||||
|
|
|
@ -42,5 +42,13 @@
|
|||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/vdev.h>
|
||||
|
||||
#ifdef __linux__
|
||||
int __vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int rw, int flags);
|
||||
int vdev_disk_io_flush(struct block_device *bdev, zio_t *zio);
|
||||
void vdev_disk_error(zio_t *zio);
|
||||
#endif /* __linux__ */
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _SYS_VDEV_DISK_H */
|
||||
|
|
|
@ -40,6 +40,10 @@ typedef struct vdev_file {
|
|||
extern void vdev_file_init(void);
|
||||
extern void vdev_file_fini(void);
|
||||
|
||||
#ifdef __linux__
|
||||
extern mode_t vdev_file_open_mode(spa_mode_t spa_mode);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -462,6 +462,8 @@ struct vdev {
|
|||
uint64_t vdev_io_t;
|
||||
uint64_t vdev_slow_io_n;
|
||||
uint64_t vdev_slow_io_t;
|
||||
|
||||
void *vdev_zia_handle;
|
||||
};
|
||||
|
||||
#define VDEV_PAD_SIZE (8 << 10)
|
||||
|
|
|
@ -169,6 +169,11 @@ extern int vdev_raidz_load(vdev_t *);
|
|||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
|
||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
|
||||
|
||||
void vdev_raidz_generate_parity_p(struct raidz_row *);
|
||||
void vdev_raidz_generate_parity_pq(struct raidz_row *);
|
||||
void vdev_raidz_generate_parity_pqr(struct raidz_row *);
|
||||
void vdev_raidz_reconstruct_general(struct raidz_row *, int *, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -136,6 +136,7 @@ typedef struct raidz_row {
|
|||
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
||||
uint64_t rr_size; /* Physical size for *_io_verify() */
|
||||
#endif
|
||||
void *rr_zia_handle;
|
||||
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
||||
} raidz_row_t;
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ typedef struct mzap_phys {
|
|||
uint64_t mz_salt;
|
||||
uint64_t mz_normflags;
|
||||
uint64_t mz_pad[5];
|
||||
mzap_ent_phys_t mz_chunk[1];
|
||||
mzap_ent_phys_t mz_chunk[];
|
||||
/* actually variable size depending on block size */
|
||||
} mzap_phys_t;
|
||||
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||
*
|
||||
* This program was produced under U.S. Government contract
|
||||
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
* is operated by Triad National Security, LLC for the U.S.
|
||||
* Department of Energy/National Nuclear Security Administration. All
|
||||
* rights in the program are reserved by Triad National Security, LLC,
|
||||
* and the U.S. Department of Energy/National Nuclear Security
|
||||
* Administration. The Government is granted for itself and others
|
||||
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
* license in this material to reproduce, prepare derivative works,
|
||||
* distribute copies to the public, perform publicly and display
|
||||
* publicly, and to permit others to do so.
|
||||
*
|
||||
* ----
|
||||
*
|
||||
* This program is open source under the BSD-3 License.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ZIA_H
|
||||
#define _ZIA_H
|
||||
|
||||
#include <sys/abd.h>
|
||||
#include <sys/fs/zfs.h> /* VDEV_RAIDZ_MAXPARITY */
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zio_compress.h>
|
||||
|
||||
/* ******************************************************** */
|
||||
/* return values */
|
||||
#define ZIA_OK 1000
|
||||
|
||||
/* something bad happened not related to missing functionality */
|
||||
#define ZIA_ERROR 1001
|
||||
|
||||
/* error, fallback to zfs implementation */
|
||||
#define ZIA_FALLBACK 1002
|
||||
|
||||
/* ran, but result is bad */
|
||||
#define ZIA_BAD_RESULT 1003
|
||||
|
||||
/* expected provider and actual provider do not match */
|
||||
#define ZIA_PROVIDER_MISMATCH 1004
|
||||
|
||||
/*
|
||||
* error, returned when the provider can no longer
|
||||
* communicate with the accelerator (providers are
|
||||
* software, and are not expected to randomly go
|
||||
* down)
|
||||
*/
|
||||
#define ZIA_ACCELERATOR_DOWN 1005
|
||||
/* ******************************************************** */
|
||||
|
||||
/* DPUSM was not found by configure */
|
||||
#define ZIA_DISABLED 1006
|
||||
|
||||
/*
|
||||
* This struct is normally set with
|
||||
* zpool set zia_<property>=on/off/<value>
|
||||
* and passed around in spa_t.
|
||||
*/
|
||||
typedef struct zia_props {
|
||||
/* global state */
|
||||
boolean_t can_offload;
|
||||
void *provider;
|
||||
|
||||
/* minimum size allowed to offload - set by ashift */
|
||||
size_t min_offload_size;
|
||||
|
||||
int compress;
|
||||
int decompress;
|
||||
|
||||
int checksum;
|
||||
|
||||
struct {
|
||||
int gen[VDEV_RAIDZ_MAXPARITY + 1];
|
||||
int rec[VDEV_RAIDZ_MAXPARITY + 1];
|
||||
} raidz;
|
||||
|
||||
int file_write;
|
||||
int disk_write;
|
||||
} zia_props_t;
|
||||
|
||||
zia_props_t *zia_get_props(spa_t *spa);
|
||||
void zia_prop_warn(boolean_t val, const char *name);
|
||||
|
||||
int zia_init(void);
|
||||
int zia_fini(void);
|
||||
|
||||
void *zia_get_provider(const char *name, vdev_t *vdev);
|
||||
const char *zia_get_provider_name(void *provider);
|
||||
int zia_put_provider(void **provider, vdev_t *vdev);
|
||||
|
||||
/*
|
||||
* turn off offloading for this zio as well as
|
||||
* all new zios created with the same spa
|
||||
*/
|
||||
int zia_disable_offloading(zio_t *zio, boolean_t reexecute);
|
||||
|
||||
/* check if offloading can occur */
|
||||
boolean_t zia_is_used(zio_t *zio);
|
||||
|
||||
/*
|
||||
* check if a handle is associated with this pointer
|
||||
*
|
||||
* not exposing functions for different handles because
|
||||
* only abd handles are checked outside of zia.c
|
||||
*/
|
||||
boolean_t zia_is_offloaded(abd_t *abd);
|
||||
|
||||
int zia_worst_error(const int lhs, const int rhs);
|
||||
|
||||
/* create a new offloader handle without copying data */
|
||||
void *zia_alloc(void *provider, size_t size, size_t min_offload_size);
|
||||
|
||||
/* deallocate handle without onloading */
|
||||
int zia_free(void **handle);
|
||||
|
||||
/* move linear data between from the offloader to memory */
|
||||
int zia_onload(void **handle, void *buf, size_t size);
|
||||
|
||||
/* calls abd_iterate_func on the abd to copy abd data back and forth */
|
||||
int zia_offload_abd(void *provider, abd_t *abd,
|
||||
size_t size, size_t min_offload_size,
|
||||
boolean_t *local_offload, boolean_t lock);
|
||||
int zia_onload_abd(abd_t *abd, size_t size,
|
||||
boolean_t keep_handle);
|
||||
int zia_free_abd(abd_t *abd, boolean_t lock);
|
||||
|
||||
/*
|
||||
* if offloaded locally, just free the handle
|
||||
* if not, onload the data and free the handle
|
||||
*/
|
||||
int zia_cleanup_abd(abd_t *abd, size_t size,
|
||||
boolean_t local_offload, boolean_t lock);
|
||||
|
||||
/* if the accelerator failed, restart the zio */
|
||||
void zia_restart_before_vdev(zio_t *zio);
|
||||
|
||||
/* fill a buffer with zeros */
|
||||
int zia_zero_fill(abd_t *abd, size_t offset, size_t size);
|
||||
|
||||
int
|
||||
zia_compress(zia_props_t *props, enum zio_compress c,
|
||||
abd_t *src, size_t s_len,
|
||||
abd_t **dst, uint64_t *d_len,
|
||||
uint8_t level, boolean_t *local_offload);
|
||||
|
||||
int
|
||||
zia_decompress(zia_props_t *props, enum zio_compress c,
|
||||
abd_t *src, size_t s_len, abd_t *dst, size_t d_len,
|
||||
uint8_t *level);
|
||||
|
||||
int zia_checksum_compute(void *provider, zio_cksum_t *dst,
|
||||
enum zio_checksum alg, zio_t *zio, uint64_t size,
|
||||
boolean_t *local_offload);
|
||||
int zia_checksum_error(enum zio_checksum alg, abd_t *abd,
|
||||
uint64_t size, int byteswap, zio_cksum_t *actual_cksum);
|
||||
|
||||
/* raidz */
|
||||
int zia_raidz_alloc(zio_t *zio, raidz_row_t *rr, boolean_t rec,
|
||||
uint_t cksum, boolean_t *local_offload);
|
||||
int zia_raidz_free(raidz_row_t *rr, boolean_t onload_parity);
|
||||
int zia_raidz_gen(raidz_row_t *rr);
|
||||
int zia_raidz_gen_cleanup(zio_t *zio, raidz_row_t *rr,
|
||||
boolean_t local_offload);
|
||||
int zia_raidz_new_parity(zio_t *zio, raidz_row_t *rr, uint64_t c);
|
||||
/* compare the contents of offloaded abds (only used in resilver) */
|
||||
int zia_raidz_cmp(abd_t *lhs, abd_t *rhs, int *diff);
|
||||
int zia_raidz_rec(raidz_row_t *rr, int *t, int nt);
|
||||
int zia_raidz_rec_cleanup(zio_t *zio, raidz_row_t *rr,
|
||||
boolean_t local_offload, boolean_t onload_parity);
|
||||
|
||||
/* file I/O */
|
||||
int zia_file_open(vdev_t *vdev, const char *path,
|
||||
int flags, int mode);
|
||||
int zia_file_write(vdev_t *vdev, abd_t *abd, ssize_t size,
|
||||
loff_t offset, ssize_t *resid, int *err);
|
||||
int zia_file_close(vdev_t *vdev);
|
||||
|
||||
#ifdef __linux__
|
||||
#ifdef _KERNEL
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
/* disk I/O */
|
||||
int zia_disk_open(vdev_t *vdev, const char *path,
|
||||
struct block_device *bdev);
|
||||
int zia_disk_invalidate(vdev_t *vdev);
|
||||
int zia_disk_write(vdev_t *vdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int flags);
|
||||
int zia_disk_flush(vdev_t *vdev, zio_t *zio);
|
||||
int zia_disk_close(vdev_t *vdev);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#ifndef _ZIA_CDDL_H
|
||||
#define _ZIA_CDDL_H
|
||||
|
||||
#include <sys/abd.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_compress.h>
|
||||
|
||||
#ifdef ZIA
|
||||
#include <dpusm/user_api.h>
|
||||
int
|
||||
zia_compress_impl(const dpusm_uf_t *dpusm, zia_props_t *props,
|
||||
enum zio_compress c, abd_t *src, size_t s_len,
|
||||
void **cbuf_handle, uint64_t *c_len,
|
||||
uint8_t level, boolean_t *local_offload);
|
||||
|
||||
int
|
||||
zia_raidz_rec_impl(const dpusm_uf_t *dpusm,
|
||||
raidz_row_t *rr, int *t, int nt);
|
||||
|
||||
#ifdef _KERNEL
|
||||
void
|
||||
zia_disk_write_completion(void *zio_ptr, int error);
|
||||
|
||||
void
|
||||
zia_disk_flush_completion(void *zio_ptr, int error);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* ZIA */
|
||||
|
||||
#endif /* _ZIA_CDDL_H */
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||
*
|
||||
* This program was produced under U.S. Government contract
|
||||
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
* is operated by Triad National Security, LLC for the U.S.
|
||||
* Department of Energy/National Nuclear Security Administration. All
|
||||
* rights in the program are reserved by Triad National Security, LLC,
|
||||
* and the U.S. Department of Energy/National Nuclear Security
|
||||
* Administration. The Government is granted for itself and others
|
||||
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
* license in this material to reproduce, prepare derivative works,
|
||||
* distribute copies to the public, perform publicly and display
|
||||
* publicly, and to permit others to do so.
|
||||
*
|
||||
* ----
|
||||
*
|
||||
* This program is open source under the BSD-3 License.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ZIA_PRIVATE_H
|
||||
#define _ZIA_PRIVATE_H
|
||||
|
||||
/*
|
||||
* needed by both zia.h and zia_cddl.h
|
||||
*/
|
||||
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_compress.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#define ABD_HANDLE(abd) (abd)->abd_zia_handle
|
||||
|
||||
#define VDEV_HANDLE(vdev) (vdev)->vdev_zia_handle
|
||||
|
||||
int
|
||||
dpusm_to_ret(const int dpusm_ret);
|
||||
|
||||
#ifdef ZIA
|
||||
#include <dpusm/user_api.h>
|
||||
|
||||
dpusm_compress_t
|
||||
compress_to_dpusm(enum zio_compress c);
|
||||
|
||||
int zia_get_capabilities(void *provider, dpusm_pc_t **caps);
|
||||
|
||||
#endif /* ZIA */
|
||||
|
||||
#endif /* _ZIA_PRIVATE_H */
|
|
@ -226,6 +226,8 @@ typedef uint64_t zio_flag_t;
|
|||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||
|
||||
#define ZIO_FLAG_ZIA_REEXECUTE (1ULL << 32)
|
||||
|
||||
#define ZIO_ALLOCATOR_NONE (-1)
|
||||
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
|
||||
|
||||
|
@ -532,6 +534,8 @@ struct zio {
|
|||
|
||||
/* Taskq dispatching state */
|
||||
taskq_ent_t io_tqent;
|
||||
|
||||
boolean_t io_can_offload;
|
||||
};
|
||||
|
||||
enum blk_verify_flag {
|
||||
|
@ -622,6 +626,7 @@ extern void zio_data_buf_free(void *buf, size_t size);
|
|||
|
||||
extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
|
||||
uint64_t bufsize, zio_transform_func_t *transform);
|
||||
extern zio_transform_t *zio_pop_transform(zio_t *zio);
|
||||
extern void zio_pop_transforms(zio_t *zio);
|
||||
|
||||
extern void zio_resubmit_stage_async(void *);
|
||||
|
|
|
@ -144,10 +144,18 @@ typedef const struct zio_compress_info {
|
|||
|
||||
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
||||
|
||||
extern int zio_compress_zeroed_cb(void *data, size_t len, void *private);
|
||||
|
||||
/*
|
||||
* lz4 compression init & free
|
||||
*/
|
||||
extern void lz4_init(void);
|
||||
extern size_t
|
||||
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n);
|
||||
extern int
|
||||
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n);
|
||||
extern void lz4_fini(void);
|
||||
|
||||
/*
|
||||
|
|
|
@ -2963,7 +2963,20 @@
|
|||
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
|
||||
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
|
||||
<enumerator name='ZPOOL_PROP_DEDUPCACHED' value='38'/>
|
||||
<enumerator name='ZPOOL_NUM_PROPS' value='39'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_AVAILABLE' value='39'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_PROVIDER' value='40'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_COMPRESS' value='41'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_DECOMPRESS' value='42'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_CHECKSUM' value='43'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ1_GEN' value='44'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ2_GEN' value='45'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ3_GEN' value='46'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ1_REC' value='47'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ2_REC' value='48'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ3_REC' value='49'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_FILE_WRITE' value='50'/>
|
||||
<enumerator name='ZPOOL_PROP_ZIA_DISK_WRITE' value='51'/>
|
||||
<enumerator name='ZPOOL_NUM_PROPS' value='52'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
|
||||
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
|
||||
|
|
|
@ -184,6 +184,8 @@ nodist_libzpool_la_SOURCES = \
|
|||
module/zfs/zfs_rlock.c \
|
||||
module/zfs/zfs_sa.c \
|
||||
module/zfs/zil.c \
|
||||
module/zfs/zia.c \
|
||||
module/zfs/zia_cddl.c \
|
||||
module/zfs/zio.c \
|
||||
module/zfs/zio_checksum.c \
|
||||
module/zfs/zio_compress.c \
|
||||
|
|
|
@ -464,6 +464,42 @@ command, though this property can be used when a specific version is needed for
|
|||
backwards compatibility.
|
||||
Once feature flags are enabled on a pool this property will no longer have a
|
||||
value.
|
||||
.It Sy zia_checksum Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload checksum computations.
|
||||
Does not have any effect if the checksum stage is disabled.
|
||||
Embedded checksums are onloaded, and will suffer a data movement penalty.
|
||||
.It Sy zia_compress Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload compression.
|
||||
Does not have any effect if the compression stage is disabled.
|
||||
Embedded data is onloaded, and will suffer a data movement penalty.
|
||||
.It Sy zia_decompress Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload decompression.
|
||||
.It Sy zia_disk_write Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether a pool should offload write I/Os to disks.
|
||||
.It Sy zia_file_write Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether a pool should offload write I/Os to files.
|
||||
.It Sy zia_provider Ns = Ns Sy (unset) | Ns Sy Z.I.A. Provider Name
|
||||
Selects an accelerator registered in the Data Processing Unit Services
|
||||
Module to offload data to.
|
||||
Only one accelerator can be used by a pool at a time.
|
||||
.It Sy zia_raidz1_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ1 parity generation.
|
||||
Does not have any effect if RAIDZ1 is disabled.
|
||||
.It Sy zia_raidz1_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ1 reconstruction.
|
||||
Does not have any effect if RAIDZ1 is disabled.
|
||||
.It Sy zia_raidz2_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ2 parity generation.
|
||||
Does not have any effect if RAIDZ2 is disabled.
|
||||
.It Sy zia_raidz2_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ2 reconstruction.
|
||||
Does not have any effect if RAIDZ2 is disabled.
|
||||
.It Sy zia_raidz3_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ3 parity generation.
|
||||
Does not have any effect if RAIDZ3 is disabled.
|
||||
.It Sy zia_raidz3_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the pool should offload RAIDZ3 reconstruction.
|
||||
Does not have any effect if RAIDZ3 is disabled.
|
||||
.El
|
||||
.
|
||||
.Ss User Properties
|
||||
|
|
|
@ -27,6 +27,7 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
|
|||
ZFS_MODULE_CFLAGS += -I$(zfs_include)
|
||||
ZFS_MODULE_CPPFLAGS += -D_KERNEL
|
||||
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
|
||||
ZFS_MODULE_CPPFLAGS += @KERNEL_ZIA_CPPFLAGS@
|
||||
|
||||
# KASAN enables -Werror=frame-larger-than=1024, which
|
||||
# breaks oh so many parts of our build.
|
||||
|
@ -424,6 +425,8 @@ ZFS_OBJS := \
|
|||
zfs_sa.o \
|
||||
zfs_vnops.o \
|
||||
zil.o \
|
||||
zia.o \
|
||||
zia_cddl.o \
|
||||
zio.o \
|
||||
zio_checksum.o \
|
||||
zio_compress.o \
|
||||
|
@ -503,3 +506,19 @@ OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y
|
|||
ifeq ($(CONFIG_ALTIVEC),y)
|
||||
$(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec
|
||||
endif
|
||||
|
||||
ifneq ("@DPUSM_SYMBOLS@","")
|
||||
obj-$(CONFIG_ZFS) += zia-software-provider.o
|
||||
|
||||
ZIA_SOFTWARE_PROVIDER_OBJS := \
|
||||
provider.o \
|
||||
kernel_offloader.o
|
||||
|
||||
zia-software-provider-objs += $(addprefix zia-software-provider/,$(ZIA_SOFTWARE_PROVIDER_OBJS))
|
||||
# zfs_file_os does not have any dependencies, so just link to it directly
|
||||
zia-software-provider-objs += os/linux/zfs/zfs_file_os.o
|
||||
|
||||
$(addprefix $(obj)/zia-software-provider/,$(ZIA_SOFTWARE_PROVIDER_OBJS)) : ccflags-y += -I@abs_top_builddir@ $(ZFS_MODULE_CFLAGS) -I@abs_srcdir@/zia-software-provider/ -I@DPUSM_ROOT@/include
|
||||
|
||||
@ZIA_ENABLED_TRUE@KBUILD_EXTRA_SYMBOLS += @DPUSM_SYMBOLS@
|
||||
endif
|
|
@ -80,7 +80,7 @@ clean: clean-@ac_system@
|
|||
|
||||
.PHONY: modules_uninstall-Linux-legacy
|
||||
modules_uninstall-Linux-legacy:
|
||||
$(RM) -r $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,spl/ avl/ icp/ lua/ nvpair/ unicode/ zcommon/ zfs/ zstd/)
|
||||
$(RM) -r $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,spl/ avl/ icp/ lua/ nvpair/ unicode/ zcommon/ zfs/ zstd/ zia-software-provider)
|
||||
|
||||
KMODDIR := $(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@
|
||||
modules_install-Linux: modules_uninstall-Linux-legacy
|
||||
|
@ -123,7 +123,7 @@ data_install: data_install-@ac_system@
|
|||
|
||||
modules_uninstall-Linux: modules_uninstall-Linux-legacy
|
||||
@# Uninstall the kernel modules
|
||||
$(RM) $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,zfs.ko spl.ko)
|
||||
$(RM) $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,zfs.ko spl.ko zia-software-provider)
|
||||
|
||||
modules_uninstall-FreeBSD:
|
||||
@false
|
||||
|
@ -153,7 +153,7 @@ cppcheck-Linux:
|
|||
-I @top_srcdir@/include/os/linux/spl \
|
||||
-I @top_srcdir@/include/os/linux/zfs \
|
||||
-I @top_srcdir@/include \
|
||||
avl icp lua nvpair unicode zcommon zfs zstd os/linux
|
||||
avl icp lua nvpair unicode zcommon zfs zstd os/linux zia-software-provider
|
||||
|
||||
cppcheck-FreeBSD:
|
||||
@true
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <sys/vdev_trim.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zia.h>
|
||||
#include <sys/zio.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/msdos_fs.h>
|
||||
|
@ -224,7 +225,7 @@ bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
|
|||
return (psize);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
vdev_disk_error(zio_t *zio)
|
||||
{
|
||||
/*
|
||||
|
@ -337,6 +338,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
|||
reread_part = B_TRUE;
|
||||
}
|
||||
|
||||
zia_disk_close(v);
|
||||
vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
|
||||
}
|
||||
|
||||
|
@ -461,6 +463,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
|||
*logical_ashift = highbit64(MAX(logical_block_size,
|
||||
SPA_MINBLOCKSIZE)) - 1;
|
||||
|
||||
zia_get_props(v->vdev_spa)->min_offload_size = 2 << *physical_ashift;
|
||||
|
||||
/* open disk; ignore errors - will fall back to ZFS */
|
||||
zia_disk_open(v, v->vdev_path, BDH_BDEV(vd->vd_bdh));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -472,9 +479,11 @@ vdev_disk_close(vdev_t *v)
|
|||
if (v->vdev_reopening || vd == NULL)
|
||||
return;
|
||||
|
||||
if (vd->vd_bdh != NULL)
|
||||
if (vd->vd_bdh != NULL) {
|
||||
zia_disk_close(v);
|
||||
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
|
||||
zfs_vdev_holder);
|
||||
}
|
||||
|
||||
rw_destroy(&vd->vd_lock);
|
||||
kmem_free(vd, sizeof (vdev_disk_t));
|
||||
|
@ -1104,17 +1113,10 @@ vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
|||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_classic_physio(zio_t *zio)
|
||||
int
|
||||
__vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||
size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
||||
size_t io_size = zio->io_size;
|
||||
uint64_t io_offset = zio->io_offset;
|
||||
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
||||
int flags = 0;
|
||||
|
||||
dio_request_t *dr;
|
||||
uint64_t abd_offset;
|
||||
uint64_t bio_offset;
|
||||
|
@ -1221,6 +1223,23 @@ retry:
|
|||
return (error);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__vdev_classic_physio);
|
||||
|
||||
static int
|
||||
vdev_classic_physio(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
||||
size_t io_size = zio->io_size;
|
||||
uint64_t io_offset = zio->io_offset;
|
||||
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
||||
int flags = 0;
|
||||
|
||||
return __vdev_classic_physio(bdev, zio,
|
||||
io_size, io_offset, rw, flags);
|
||||
}
|
||||
|
||||
/* ========== */
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||
|
@ -1242,7 +1261,7 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
|||
zio_interrupt(zio);
|
||||
}
|
||||
|
||||
static int
|
||||
int
|
||||
vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
||||
{
|
||||
struct request_queue *q;
|
||||
|
@ -1265,6 +1284,8 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
|||
return (0);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(vdev_disk_io_flush);
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
||||
{
|
||||
zio_t *zio = bio->bi_private;
|
||||
|
@ -1423,6 +1444,17 @@ vdev_disk_io_start(zio_t *zio)
|
|||
* Issue the flush. If successful, the response will
|
||||
* be handled in the completion callback, so we're done.
|
||||
*/
|
||||
error = zia_disk_flush(v, zio);
|
||||
|
||||
/*
|
||||
* have to return here in order to not dispatch
|
||||
* this zio to multiple task queues
|
||||
*/
|
||||
if (error == 0) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
|
||||
if (error == 0) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
|
@ -1446,8 +1478,46 @@ vdev_disk_io_start(zio_t *zio)
|
|||
return;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
error = vdev_disk_io_rw_fn(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
return;
|
||||
|
||||
case ZIO_TYPE_WRITE:
|
||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||
error = EIO;
|
||||
|
||||
boolean_t local_offload = B_FALSE;
|
||||
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||
if ((zia_props->disk_write == 1) &&
|
||||
(zio->io_can_offload == B_TRUE)) {
|
||||
if (zia_offload_abd(zia_props->provider, zio->io_abd,
|
||||
zio->io_size, zia_props->min_offload_size,
|
||||
&local_offload, B_TRUE) == ZIA_OK) {
|
||||
error = zia_disk_write(v, zio, zio->io_size,
|
||||
zio->io_offset, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (error == 0) {
|
||||
rw_exit(&vd->vd_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
error = zia_cleanup_abd(zio->io_abd, zio->io_size,
|
||||
local_offload, B_TRUE);
|
||||
|
||||
if (error == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_disable_offloading(zio, B_TRUE);
|
||||
rw_exit(&vd->vd_lock);
|
||||
zio_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
error = vdev_disk_io_rw_fn(zio);
|
||||
rw_exit(&vd->vd_lock);
|
||||
if (error) {
|
||||
|
@ -1488,6 +1558,7 @@ vdev_disk_io_done(zio_t *zio)
|
|||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
|
||||
if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) {
|
||||
zia_disk_invalidate(v);
|
||||
invalidate_bdev(BDH_BDEV(vd->vd_bdh));
|
||||
v->vdev_remove_wanted = B_TRUE;
|
||||
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <sys/fcntl.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/zfs_file.h>
|
||||
#include <sys/zia.h>
|
||||
#ifdef _KERNEL
|
||||
#include <linux/falloc.h>
|
||||
#endif
|
||||
|
@ -68,7 +69,11 @@ vdev_file_rele(vdev_t *vd)
|
|||
ASSERT(vd->vdev_path != NULL);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
mode_t
|
||||
#else
|
||||
static mode_t
|
||||
#endif
|
||||
vdev_file_open_mode(spa_mode_t spa_mode)
|
||||
{
|
||||
mode_t mode = 0;
|
||||
|
@ -161,6 +166,12 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
|||
}
|
||||
#endif
|
||||
|
||||
zia_get_props(vd->vdev_spa)->min_offload_size = 2 << *physical_ashift;
|
||||
|
||||
/* try to open the file; ignore errors - will fall back to ZFS */
|
||||
zia_file_open(vd, vd->vdev_path,
|
||||
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0);
|
||||
|
||||
skip_open:
|
||||
|
||||
error = zfs_file_getattr(vf->vf_file, &zfa);
|
||||
|
@ -184,6 +195,8 @@ vdev_file_close(vdev_t *vd)
|
|||
if (vd->vdev_reopening || vf == NULL)
|
||||
return;
|
||||
|
||||
zia_file_close(vd);
|
||||
|
||||
if (vf->vf_file != NULL) {
|
||||
(void) zfs_file_close(vf->vf_file);
|
||||
}
|
||||
|
@ -203,18 +216,53 @@ vdev_file_io_strategy(void *arg)
|
|||
void *buf;
|
||||
loff_t off;
|
||||
ssize_t size;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
off = zio->io_offset;
|
||||
size = zio->io_size;
|
||||
resid = 0;
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ) {
|
||||
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
||||
buf = abd_borrow_buf(zio->io_abd, size);
|
||||
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
|
||||
abd_return_buf_copy(zio->io_abd, buf, size);
|
||||
} else {
|
||||
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
||||
err = EIO;
|
||||
|
||||
boolean_t local_offload = B_FALSE;
|
||||
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||
|
||||
if ((zia_props->file_write == 1) &&
|
||||
(zio->io_can_offload == B_TRUE)) {
|
||||
if (zia_offload_abd(zia_props->provider, zio->io_abd,
|
||||
size, zia_props->min_offload_size,
|
||||
&local_offload, B_TRUE) == ZIA_OK) {
|
||||
err = zia_file_write(vd, zio->io_abd, size, off,
|
||||
&resid, &err);
|
||||
}
|
||||
}
|
||||
|
||||
/* if offload and write succeeded, return here */
|
||||
if (err == 0) {
|
||||
zio->io_error = err;
|
||||
if (resid != 0 && zio->io_error == 0)
|
||||
zio->io_error = SET_ERROR(ENOSPC);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
/* if offload or write failed, bring data back into memory */
|
||||
err = zia_cleanup_abd(zio->io_abd, size, local_offload, B_TRUE);
|
||||
|
||||
/* if onload failed, restart the zio with offloading disabled */
|
||||
if (err == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_disable_offloading(zio, B_TRUE);
|
||||
zio_delay_interrupt(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
buf = abd_borrow_buf_copy(zio->io_abd, size);
|
||||
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
|
||||
abd_return_buf(zio->io_abd, buf, size);
|
||||
}
|
||||
|
|
|
@ -187,6 +187,51 @@ zpool_prop_init(void)
|
|||
ZPOOL_DEDUPCACHED_PROP_NAME, PROP_TYPE_NUMBER, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "DEDUPCACHED", B_FALSE, sfeatures);
|
||||
|
||||
zprop_register_string(ZPOOL_PROP_ZIA_AVAILABLE, "zia_available",
|
||||
#ifdef ZIA
|
||||
"yes",
|
||||
#else
|
||||
"no",
|
||||
#endif
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "yes | no", "zia_available",
|
||||
sfeatures);
|
||||
zprop_register_string(ZPOOL_PROP_ZIA_PROVIDER, "zia_provider", NULL,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<Z.I.A. Provider Name>", "PROVIDER",
|
||||
sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_COMPRESS, "zia_compress",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_compress", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_DECOMPRESS, "zia_decompress",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_decompress", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_CHECKSUM,
|
||||
"zia_checksum", 1, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||
"on | off", "zia_checksum", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ1_GEN, "zia_raidz1_gen",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz1_gen", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ2_GEN, "zia_raidz2_gen",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz2_gen", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ3_GEN, "zia_raidz3_gen",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz3_gen", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ1_REC, "zia_raidz1_rec",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz1_rec", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ2_REC, "zia_raidz2_rec",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz2_rec", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ3_REC, "zia_raidz3_rec",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_raidz3_rec", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_FILE_WRITE, "zia_file_write",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_file_write", boolean_table, sfeatures);
|
||||
zprop_register_index(ZPOOL_PROP_ZIA_DISK_WRITE, "zia_disk_write",
|
||||
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||
"zia_disk_write", boolean_table, sfeatures);
|
||||
|
||||
zfs_mod_list_supported_free(sfeatures);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
© 2021. Triad National Security, LLC. All rights reserved.
|
||||
|
||||
This program was produced under U.S. Government contract
|
||||
89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
is operated by Triad National Security, LLC for the U.S.
|
||||
Department of Energy/National Nuclear Security Administration. All
|
||||
rights in the program are reserved by Triad National Security, LLC,
|
||||
and the U.S. Department of Energy/National Nuclear Security
|
||||
Administration. The Government is granted for itself and others
|
||||
acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
license in this material to reproduce, prepare derivative works,
|
||||
distribute copies to the public, perform publicly and display
|
||||
publicly, and to permit others to do so.
|
||||
|
||||
----
|
||||
|
||||
This program is open source under the BSD-3 License.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1 @@
|
|||
Z.I.A. FUNCTIONALITY IN ZFS
|
|
@ -101,6 +101,7 @@
|
|||
#include <sys/zio.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zia.h>
|
||||
|
||||
/* see block comment above for description */
|
||||
int zfs_abd_scatter_enabled = B_TRUE;
|
||||
|
@ -147,11 +148,15 @@ abd_init_struct(abd_t *abd)
|
|||
abd->abd_parent = NULL;
|
||||
#endif
|
||||
abd->abd_size = 0;
|
||||
|
||||
abd->abd_zia_handle = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
abd_fini_struct(abd_t *abd)
|
||||
{
|
||||
zia_free_abd(abd, B_TRUE);
|
||||
|
||||
mutex_destroy(&abd->abd_mtx);
|
||||
ASSERT(!list_link_active(&abd->abd_gang_link));
|
||||
#ifdef ZFS_DEBUG
|
||||
|
@ -321,6 +326,8 @@ abd_free(abd_t *abd)
|
|||
abd_free_struct_impl(abd);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(abd_free);
|
||||
|
||||
/*
|
||||
* Allocate an ABD of the same format (same metadata flag, same scatterize
|
||||
* setting) as another ABD.
|
||||
|
@ -630,6 +637,8 @@ abd_get_from_buf(void *buf, size_t size)
|
|||
return (abd_get_from_buf_impl(abd, buf, size));
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(abd_get_from_buf);
|
||||
|
||||
abd_t *
|
||||
abd_get_from_buf_struct(abd_t *abd, void *buf, size_t size)
|
||||
{
|
||||
|
@ -736,7 +745,6 @@ abd_release_ownership_of_buf(abd_t *abd)
|
|||
abd_update_linear_stats(abd, ABDSTAT_DECR);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Give this ABD ownership of the buffer that it's storing. Can only be used on
|
||||
* linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
|
||||
|
|
|
@ -57,6 +57,7 @@
|
|||
#include <sys/trace_zfs.h>
|
||||
#include <sys/zfs_racct.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
#include <sys/zia.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/vmsystm.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
@ -2778,6 +2779,7 @@ byteswap_uint8_array(void *vbuf, size_t size)
|
|||
void
|
||||
dmu_init(void)
|
||||
{
|
||||
zia_init();
|
||||
abd_init();
|
||||
zfs_dbgmsg_init();
|
||||
sa_cache_init();
|
||||
|
@ -2793,6 +2795,7 @@ dmu_init(void)
|
|||
void
|
||||
dmu_fini(void)
|
||||
{
|
||||
zia_fini();
|
||||
arc_fini(); /* arc depends on l2arc, so arc must go first */
|
||||
l2arc_fini();
|
||||
dmu_tx_fini();
|
||||
|
|
|
@ -52,7 +52,7 @@ int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
|
|||
|
||||
static kmem_cache_t *lz4_cache;
|
||||
|
||||
static size_t
|
||||
size_t
|
||||
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
|
@ -80,7 +80,7 @@ zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
|||
return (bufsiz + sizeof (bufsiz));
|
||||
}
|
||||
|
||||
static int
|
||||
int
|
||||
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||
size_t d_len, int n)
|
||||
{
|
||||
|
@ -103,6 +103,9 @@ zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
|||
ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
|
||||
ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
|
||||
|
||||
EXPORT_SYMBOL(zfs_lz4_compress_buf);
|
||||
EXPORT_SYMBOL(zfs_lz4_decompress_buf);
|
||||
|
||||
/*
|
||||
* LZ4 API Description:
|
||||
*
|
||||
|
|
198
module/zfs/spa.c
198
module/zfs/spa.c
|
@ -89,6 +89,7 @@
|
|||
#include <sys/dsl_scan.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/zia.h>
|
||||
#include <sys/zvol.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
@ -532,6 +533,46 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv)
|
|||
dp->scd_path, 0, ZPROP_SRC_LOCAL);
|
||||
}
|
||||
}
|
||||
|
||||
zia_props_t *zia_props = zia_get_props(spa);
|
||||
if (zia_props->provider != NULL) {
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_PROVIDER,
|
||||
(char *)zia_get_provider_name(zia_props->provider),
|
||||
0, ZPROP_SRC_LOCAL);
|
||||
}
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_COMPRESS,
|
||||
NULL, zia_props->compress, ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_DECOMPRESS,
|
||||
NULL, zia_props->decompress, ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_CHECKSUM,
|
||||
NULL, zia_props->checksum, ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ1_GEN,
|
||||
NULL, zia_props->raidz.gen[1], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ2_GEN,
|
||||
NULL, zia_props->raidz.gen[2], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ3_GEN,
|
||||
NULL, zia_props->raidz.gen[3], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ1_REC,
|
||||
NULL, zia_props->raidz.rec[1], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ2_REC,
|
||||
NULL, zia_props->raidz.rec[2], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ3_REC,
|
||||
NULL, zia_props->raidz.rec[3], ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_FILE_WRITE,
|
||||
NULL, zia_props->file_write, ZPROP_SRC_LOCAL);
|
||||
|
||||
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_DISK_WRITE,
|
||||
NULL, zia_props->disk_write, ZPROP_SRC_LOCAL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -844,6 +885,20 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
|||
error = SET_ERROR(E2BIG);
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_ZIA_PROVIDER:
|
||||
case ZPOOL_PROP_ZIA_COMPRESS:
|
||||
case ZPOOL_PROP_ZIA_DECOMPRESS:
|
||||
case ZPOOL_PROP_ZIA_CHECKSUM:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ1_GEN:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ2_GEN:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ3_GEN:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ1_REC:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ2_REC:
|
||||
case ZPOOL_PROP_ZIA_RAIDZ3_REC:
|
||||
case ZPOOL_PROP_ZIA_FILE_WRITE:
|
||||
case ZPOOL_PROP_ZIA_DISK_WRITE:
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -2190,6 +2245,11 @@ spa_unload(spa_t *spa)
|
|||
|
||||
spa->spa_raidz_expand = NULL;
|
||||
|
||||
if (zia_get_props(spa)->provider != NULL) {
|
||||
zia_put_provider(&zia_get_props(spa)->provider,
|
||||
spa->spa_root_vdev);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_ALL, spa);
|
||||
}
|
||||
|
||||
|
@ -6685,6 +6745,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
|||
|
||||
spa_import_os(spa);
|
||||
|
||||
zia_get_props(spa)->can_offload = B_FALSE;
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
return (0);
|
||||
|
@ -9547,6 +9609,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
|||
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
nvpair_t *elem = NULL;
|
||||
zia_props_t *zia_props = zia_get_props(spa);
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
|
@ -9620,7 +9683,142 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
|||
spa_history_log_internal(spa, "set", tx,
|
||||
"%s=%s", nvpair_name(elem), strval);
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_PROVIDER:
|
||||
strval = fnvpair_value_string(elem);
|
||||
if (zia_props->provider != NULL)
|
||||
zia_put_provider(&zia_props->provider,
|
||||
spa->spa_root_vdev);
|
||||
zia_props->provider = zia_get_provider(strval,
|
||||
spa->spa_root_vdev);
|
||||
zia_props->can_offload = !!zia_props->provider;
|
||||
|
||||
/*
|
||||
* Dirty the configuration on vdevs as above.
|
||||
*/
|
||||
if (tx->tx_txg != TXG_INITIAL) {
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
|
||||
}
|
||||
|
||||
/*
|
||||
* reopen devices so that provider is used
|
||||
* copied from zfs_ioc_pool_reopen
|
||||
*/
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
vdev_close(spa->spa_root_vdev);
|
||||
(void) vdev_open(spa->spa_root_vdev);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
|
||||
spa_history_log_internal(spa, "set", tx,
|
||||
"%s=%s", nvpair_name(elem), strval);
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_COMPRESS:
|
||||
zia_props->compress =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->compress,
|
||||
"Compression");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_DECOMPRESS:
|
||||
zia_props->decompress =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->decompress,
|
||||
"Decompression");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_CHECKSUM:
|
||||
zia_props->checksum =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->checksum,
|
||||
"Checksum");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ1_GEN:
|
||||
zia_props->raidz.gen[1] =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->raidz.gen[1],
|
||||
"RAIDZ 1 Generation");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ2_GEN:
|
||||
zia_props->raidz.gen[2] =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->raidz.gen[2],
|
||||
"RAIDZ 2 Generation");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ3_GEN:
|
||||
zia_props->raidz.gen[3] =
|
||||
fnvpair_value_uint64(elem);
|
||||
zia_prop_warn(zia_props->raidz.gen[3],
|
||||
"RAIDZ 3 Generation");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ1_REC:
|
||||
zia_props->raidz.rec[1] =
|
||||
fnvpair_value_uint64(elem);
|
||||
/* need checksum */
|
||||
if (zia_props->raidz.rec[1]) {
|
||||
if (!zia_props->checksum) {
|
||||
zia_props->checksum = 1;
|
||||
zia_prop_warn(
|
||||
zia_props->checksum,
|
||||
"Checksum");
|
||||
}
|
||||
}
|
||||
zia_prop_warn(zia_props->raidz.rec[1],
|
||||
"RAIDZ 1 Reconstruction");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ2_REC:
|
||||
zia_props->raidz.rec[2] =
|
||||
fnvpair_value_uint64(elem);
|
||||
/* need checksum */
|
||||
if (zia_props->raidz.rec[2]) {
|
||||
if (!zia_props->checksum) {
|
||||
zia_props->checksum = 1;
|
||||
zia_prop_warn(
|
||||
zia_props->checksum,
|
||||
"Checksum");
|
||||
}
|
||||
}
|
||||
zia_prop_warn(zia_props->raidz.rec[2],
|
||||
"RAIDZ 2 Reconstruction");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_RAIDZ3_REC:
|
||||
zia_props->raidz.rec[3] =
|
||||
fnvpair_value_uint64(elem);
|
||||
/* need checksum */
|
||||
if (zia_props->raidz.rec[3]) {
|
||||
if (!zia_props->checksum) {
|
||||
zia_props->checksum = 1;
|
||||
zia_prop_warn(
|
||||
zia_props->checksum,
|
||||
"Checksum");
|
||||
}
|
||||
}
|
||||
zia_prop_warn(zia_props->raidz.rec[3],
|
||||
"RAIDZ 3 Reconstruction");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_FILE_WRITE:
|
||||
zia_props->file_write =
|
||||
fnvpair_value_uint64(elem);
|
||||
|
||||
/* reopen devices so that provider is used */
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
vdev_close(spa->spa_root_vdev);
|
||||
(void) vdev_open(spa->spa_root_vdev);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
|
||||
zia_prop_warn(zia_props->file_write,
|
||||
"File Write");
|
||||
break;
|
||||
case ZPOOL_PROP_ZIA_DISK_WRITE:
|
||||
zia_props->disk_write =
|
||||
fnvpair_value_uint64(elem);
|
||||
|
||||
/* reopen devices so that provider is used */
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
vdev_close(spa->spa_root_vdev);
|
||||
(void) vdev_open(spa->spa_root_vdev);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
|
||||
zia_prop_warn(zia_props->disk_write,
|
||||
"Disk Write");
|
||||
break;
|
||||
case ZPOOL_PROP_INVAL:
|
||||
if (zpool_prop_feature(elemname)) {
|
||||
fname = strchr(elemname, '@') + 1;
|
||||
|
|
|
@ -725,6 +725,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
|||
vd->vdev_stat.vs_timestamp = gethrtime();
|
||||
vdev_queue_init(vd);
|
||||
|
||||
vd->vdev_zia_handle = NULL;
|
||||
|
||||
return (vd);
|
||||
}
|
||||
|
||||
|
@ -1067,6 +1069,8 @@ vdev_free(vdev_t *vd)
|
|||
*/
|
||||
vdev_close(vd);
|
||||
|
||||
ASSERT3P(vd->vdev_zia_handle, ==, NULL);
|
||||
|
||||
ASSERT(!list_link_active(&vd->vdev_config_dirty_node));
|
||||
ASSERT(!list_link_active(&vd->vdev_state_dirty_node));
|
||||
|
||||
|
|
|
@ -1033,6 +1033,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
|
|||
rr->rr_offset = io_offset;
|
||||
rr->rr_size = io_size;
|
||||
#endif
|
||||
rr->rr_zia_handle = NULL;
|
||||
*rrp = rr;
|
||||
|
||||
uint8_t *base;
|
||||
|
|
|
@ -43,6 +43,7 @@
|
|||
#include <sys/vdev_draid.h>
|
||||
#include <sys/uberblock_impl.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/zia.h>
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#include <sys/vdev.h> /* For vdev_xlate() in vdev_raidz_io_verify() */
|
||||
|
@ -376,6 +377,8 @@ static int zfs_scrub_after_expand = 1;
|
|||
static void
|
||||
vdev_raidz_row_free(raidz_row_t *rr)
|
||||
{
|
||||
zia_raidz_free(rr, B_FALSE);
|
||||
|
||||
for (int c = 0; c < rr->rr_cols; c++) {
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
|
||||
|
@ -628,6 +631,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
|
|||
rr->rr_offset = zio->io_offset;
|
||||
rr->rr_size = zio->io_size;
|
||||
#endif
|
||||
rr->rr_zia_handle = NULL;
|
||||
|
||||
uint64_t asize = 0;
|
||||
|
||||
|
@ -1094,7 +1098,7 @@ vdev_raidz_pqr_func(void *buf, size_t size, void *private)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
vdev_raidz_generate_parity_p(raidz_row_t *rr)
|
||||
{
|
||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||
|
@ -1112,7 +1116,9 @@ vdev_raidz_generate_parity_p(raidz_row_t *rr)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
EXPORT_SYMBOL(vdev_raidz_generate_parity_p);
|
||||
|
||||
void
|
||||
vdev_raidz_generate_parity_pq(raidz_row_t *rr)
|
||||
{
|
||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||
|
@ -1154,7 +1160,9 @@ vdev_raidz_generate_parity_pq(raidz_row_t *rr)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
EXPORT_SYMBOL(vdev_raidz_generate_parity_pq);
|
||||
|
||||
void
|
||||
vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
|
||||
{
|
||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||
|
@ -1202,6 +1210,8 @@ vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
|
|||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(vdev_raidz_generate_parity_pqr);
|
||||
|
||||
/*
|
||||
* Generate RAID parity in the first virtual columns according to the number of
|
||||
* parity columns available.
|
||||
|
@ -1888,7 +1898,7 @@ vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing,
|
|||
kmem_free(p, psize);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
|
||||
{
|
||||
int i, c, t, tt;
|
||||
|
@ -2029,6 +2039,8 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
|
|||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(vdev_raidz_reconstruct_general);
|
||||
|
||||
static void
|
||||
vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
|
||||
const int *t, int nt)
|
||||
|
@ -2333,7 +2345,24 @@ vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr)
|
|||
vdev_t *vd = zio->io_vd;
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
|
||||
vdev_raidz_generate_parity_row(rm, rr);
|
||||
/*
|
||||
* here instead of vdev_raidz_map_alloc or
|
||||
* vdev_raidz_generate_parity_row to not have to
|
||||
* store local_offload and be able to use zio
|
||||
*/
|
||||
boolean_t local_offload = B_FALSE;
|
||||
if ((zia_raidz_alloc(zio, rr, B_FALSE, 0, &local_offload) != ZIA_OK) ||
|
||||
(zia_raidz_gen(rr) != ZIA_OK)) {
|
||||
if (zia_raidz_gen_cleanup(zio, rr,
|
||||
local_offload) == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_disable_offloading(zio, B_TRUE);
|
||||
zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
|
||||
return;
|
||||
}
|
||||
vdev_raidz_generate_parity_row(rm, rr);
|
||||
} else {
|
||||
zio->io_flags |= ZIO_FLAG_DONT_AGGREGATE;
|
||||
}
|
||||
|
||||
for (int c = 0; c < rr->rr_scols; c++) {
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
|
@ -2631,14 +2660,69 @@ raidz_checksum_verify(zio_t *zio)
|
|||
{
|
||||
zio_bad_cksum_t zbc = {0};
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
/*
|
||||
* if the zio entered this function offloaded,
|
||||
* need to onload the parity columns on error
|
||||
*/
|
||||
const boolean_t entered_offloaded = zia_is_offloaded(zio->io_abd);
|
||||
|
||||
int ret = zio_checksum_error(zio, &zbc);
|
||||
if (ret != 0 && zbc.zbc_injected != 0)
|
||||
rm->rm_ecksuminjected = 1;
|
||||
|
||||
/*
|
||||
* zio_checksum_error does not get access to
|
||||
* rm, so only the abd is freed on error -
|
||||
* clean up rm here
|
||||
*/
|
||||
if (zia_is_offloaded(zio->io_abd) != B_TRUE) {
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
|
||||
/*
|
||||
* force onload, since data was modified
|
||||
*
|
||||
* ignore return value - will always return ZIA_ERROR
|
||||
*/
|
||||
zia_raidz_rec_cleanup(zio, rr, B_TRUE,
|
||||
entered_offloaded);
|
||||
}
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
raidz_move_orig_parity(zio_t *zio, raidz_row_t *rr, abd_t **orig)
|
||||
{
|
||||
(void) zio;
|
||||
|
||||
for (uint64_t c = 0; c < rr->rr_firstdatacol; c++) {
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
if (!rc->rc_tried || rc->rc_error != 0)
|
||||
continue;
|
||||
|
||||
orig[c] = rc->rc_abd;
|
||||
ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
|
||||
rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
|
||||
zia_raidz_new_parity(zio, rr, c);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
raidz_restore_orig_parity(raidz_row_t *rr, abd_t **orig)
|
||||
{
|
||||
for (uint64_t c = 0; c < rr->rr_firstdatacol; c++) {
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
if (!rc->rc_tried || rc->rc_error != 0)
|
||||
continue;
|
||||
|
||||
abd_free(rc->rc_abd);
|
||||
rc->rc_abd = orig[c];
|
||||
orig[c] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the parity from the data columns. If we tried and were able to
|
||||
* read the parity without error, verify that the generated parity matches the
|
||||
|
@ -2648,7 +2732,7 @@ raidz_checksum_verify(zio_t *zio)
|
|||
static int
|
||||
raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||
{
|
||||
abd_t *orig[VDEV_RAIDZ_MAXPARITY];
|
||||
abd_t *orig[VDEV_RAIDZ_MAXPARITY] = { NULL };
|
||||
int c, ret = 0;
|
||||
raidz_map_t *rm = zio->io_vsd;
|
||||
raidz_col_t *rc;
|
||||
|
@ -2660,15 +2744,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
|||
if (checksum == ZIO_CHECKSUM_NOPARITY)
|
||||
return (ret);
|
||||
|
||||
for (c = 0; c < rr->rr_firstdatacol; c++) {
|
||||
rc = &rr->rr_col[c];
|
||||
if (!rc->rc_tried || rc->rc_error != 0)
|
||||
continue;
|
||||
|
||||
orig[c] = rc->rc_abd;
|
||||
ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
|
||||
rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
|
||||
}
|
||||
raidz_move_orig_parity(zio, rr, orig);
|
||||
|
||||
/*
|
||||
* Verify any empty sectors are zero filled to ensure the parity
|
||||
|
@ -2682,7 +2758,29 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
|||
* isn't harmful but it does have the side effect of fixing stuff
|
||||
* we didn't realize was necessary (i.e. even if we return 0).
|
||||
*/
|
||||
vdev_raidz_generate_parity_row(rm, rr);
|
||||
if (zia_raidz_gen(rr) != ZIA_OK) {
|
||||
/*
|
||||
* restore original parity columns so
|
||||
* that the reconstructed parity can
|
||||
* be brought back with the data columns
|
||||
*/
|
||||
raidz_restore_orig_parity(rr, orig);
|
||||
|
||||
/* return reconstructed columns to memory */
|
||||
const int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||
B_FALSE, B_TRUE);
|
||||
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* continue to software, so redo the
|
||||
* original moving of parity columns
|
||||
*/
|
||||
raidz_move_orig_parity(zio, rr, orig);
|
||||
vdev_raidz_generate_parity_row(rm, rr);
|
||||
}
|
||||
|
||||
for (c = 0; c < rr->rr_firstdatacol; c++) {
|
||||
rc = &rr->rr_col[c];
|
||||
|
@ -2690,7 +2788,70 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
|||
if (!rc->rc_tried || rc->rc_error != 0)
|
||||
continue;
|
||||
|
||||
if (abd_cmp(orig[c], rc->rc_abd) != 0) {
|
||||
int cmp = 0;
|
||||
if (zia_raidz_cmp(orig[c], rc->rc_abd, &cmp) != ZIA_OK) {
|
||||
if (zia_is_offloaded(zio->io_abd) ||
|
||||
rr->rr_zia_handle) {
|
||||
/*
|
||||
* should only need to onload orig[c] and
|
||||
* rc but onloading everything to not create
|
||||
* inconsistent rr state
|
||||
*/
|
||||
int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||
B_FALSE, B_TRUE);
|
||||
|
||||
for (uint64_t i = 0; i < rr->rr_firstdatacol;
|
||||
i++) {
|
||||
if (orig[i]) {
|
||||
zia_rc = zia_worst_error(zia_rc,
|
||||
zia_onload_abd(orig[i],
|
||||
orig[i]->abd_size,
|
||||
B_FALSE));
|
||||
}
|
||||
}
|
||||
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
/*
|
||||
* get original parity columns back to
|
||||
* get the original in-memory data
|
||||
*/
|
||||
raidz_restore_orig_parity(rr, orig);
|
||||
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||
}
|
||||
}
|
||||
cmp = abd_cmp(orig[c], rc->rc_abd);
|
||||
}
|
||||
if (cmp != 0) {
|
||||
if (zia_is_offloaded(zio->io_abd) ||
|
||||
rr->rr_zia_handle) {
|
||||
/*
|
||||
* should only need to onload orig[c] and
|
||||
* rc but onloading everything to not create
|
||||
* inconsistent rr state
|
||||
*/
|
||||
int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||
B_FALSE, B_TRUE);
|
||||
|
||||
for (uint64_t i = 0; i < rr->rr_firstdatacol;
|
||||
i++) {
|
||||
if (orig[i]) {
|
||||
zia_rc = zia_worst_error(zia_rc,
|
||||
zia_onload_abd(orig[i],
|
||||
orig[i]->abd_size,
|
||||
B_FALSE));
|
||||
}
|
||||
}
|
||||
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
/*
|
||||
* get original parity columns back to
|
||||
* get the original in-memory data
|
||||
*/
|
||||
raidz_restore_orig_parity(rr, orig);
|
||||
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||
}
|
||||
}
|
||||
|
||||
zfs_dbgmsg("found error on col=%u devidx=%u off %llx",
|
||||
c, (int)rc->rc_devidx, (u_longlong_t)rc->rc_offset);
|
||||
vdev_raidz_checksum_error(zio, rc, orig[c]);
|
||||
|
@ -2716,7 +2877,7 @@ vdev_raidz_worst_error(raidz_row_t *rr)
|
|||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||
{
|
||||
int unexpected_errors = 0;
|
||||
|
@ -2758,6 +2919,10 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
|||
(zio->io_flags & ZIO_FLAG_RESILVER)) {
|
||||
int n = raidz_parity_verify(zio, rr);
|
||||
unexpected_errors += n;
|
||||
|
||||
if (n != 0) {
|
||||
return (n);
|
||||
}
|
||||
}
|
||||
|
||||
if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
|
||||
|
@ -2826,6 +2991,7 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
|||
zio_nowait(cio);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2970,15 +3136,43 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
|
|||
zfs_dbgmsg("reconstruction not possible; "
|
||||
"too many failures");
|
||||
}
|
||||
|
||||
/* drop offloaded data */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||
/* no data movement, so errors don't matter */
|
||||
}
|
||||
raidz_restore_orig_data(rm);
|
||||
return (EINVAL);
|
||||
}
|
||||
if (dead_data > 0)
|
||||
vdev_raidz_reconstruct_row(rm, rr, my_tgts, t);
|
||||
|
||||
if (dead_data > 0) {
|
||||
/*
|
||||
* here instead of vdev_raidz_reconstruct_row
|
||||
* to be able to use zio
|
||||
*/
|
||||
if (zia_raidz_rec(rr, my_tgts, t) != ZIA_OK) {
|
||||
int ret = ZIA_OK;
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
ret = zia_worst_error(ret,
|
||||
zia_raidz_rec_cleanup(zio, rr,
|
||||
B_FALSE, B_TRUE));
|
||||
}
|
||||
|
||||
if ((ret != ZIA_OK) &&
|
||||
(ret != ZIA_ACCELERATOR_DOWN)) {
|
||||
vdev_raidz_reconstruct_row(rm, rr,
|
||||
my_tgts, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for success */
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
int ret = 0;
|
||||
|
||||
/* Reconstruction succeeded - report errors */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
|
@ -3008,19 +3202,32 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
|
|||
}
|
||||
}
|
||||
|
||||
vdev_raidz_io_done_verified(zio, rr);
|
||||
const int rc =
|
||||
vdev_raidz_io_done_verified(zio, rr);
|
||||
ret = zia_worst_error(ret, rc);
|
||||
}
|
||||
|
||||
zio_checksum_verified(zio);
|
||||
|
||||
if (ret != ZIA_ACCELERATOR_DOWN) {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (dbgmsg) {
|
||||
zfs_dbgmsg("reconstruction successful "
|
||||
"(checksum verified)");
|
||||
}
|
||||
return (0);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* Reconstruction failed - restore original data */
|
||||
/* drop offloaded data */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||
/* no data movement, so errors don't matter */
|
||||
}
|
||||
raidz_restore_orig_data(rm);
|
||||
if (dbgmsg) {
|
||||
zfs_dbgmsg("raidz_reconstruct_expanded(zio=%px) checksum "
|
||||
|
@ -3128,6 +3335,9 @@ vdev_raidz_combrec(zio_t *zio)
|
|||
for (;;) {
|
||||
int err = raidz_reconstruct(zio, ltgts, num_failures,
|
||||
nparity);
|
||||
if (err == ZIA_ACCELERATOR_DOWN) {
|
||||
return (err);
|
||||
}
|
||||
if (err == EINVAL) {
|
||||
/*
|
||||
* Reconstruction not possible with this #
|
||||
|
@ -3316,6 +3526,18 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
|
|||
|
||||
ASSERT(rr->rr_firstdatacol >= n);
|
||||
|
||||
if (zia_raidz_rec(rr, tgts, n) == ZIA_OK) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* drop handles instead of onloading
|
||||
*
|
||||
* return value doesn't matter because
|
||||
* the data hasn't changed yet
|
||||
*/
|
||||
zia_raidz_rec_cleanup(zio, rr,
|
||||
B_TRUE, B_FALSE);
|
||||
vdev_raidz_reconstruct_row(rm, rr, tgts, n);
|
||||
}
|
||||
}
|
||||
|
@ -3437,16 +3659,88 @@ vdev_raidz_io_done(zio_t *zio)
|
|||
}
|
||||
}
|
||||
|
||||
/* the raidz rows should never enter here already offloaded */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
ASSERT(rr->rr_zia_handle == NULL);
|
||||
}
|
||||
|
||||
/* offload once at beginning */
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
if (bp && !BP_IS_METADATA(bp)) {
|
||||
uint_t checksum = (BP_IS_GANG(bp) ?
|
||||
ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp));
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
if (!(ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED)) {
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
/*
|
||||
* Allow unchecked failure since failure
|
||||
* to offload means the software path
|
||||
* will be taken. Whether or not the
|
||||
* provider/offloader is valid
|
||||
* becomes irrelevant.
|
||||
*/
|
||||
zia_raidz_alloc(zio, rr,
|
||||
B_TRUE, checksum, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
vdev_raidz_io_done_reconstruct_known_missing(zio,
|
||||
rm, rr);
|
||||
/*
|
||||
* Restarting here is unnecessary. If the offloader
|
||||
* failed, the offloaded data is still in sync with
|
||||
* the in-memory data, and falling back reconstructed
|
||||
* using the correct data.
|
||||
*/
|
||||
}
|
||||
|
||||
if (raidz_checksum_verify(zio) == 0) {
|
||||
int ret = raidz_checksum_verify(zio);
|
||||
|
||||
/* ZIA_ACCELERATOR_DOWN is a completely orthogonal error */
|
||||
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
vdev_raidz_io_done_verified(zio, rr);
|
||||
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||
}
|
||||
|
||||
zio->io_can_offload = B_FALSE;
|
||||
zio_vdev_io_redone(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
ret =
|
||||
vdev_raidz_io_done_verified(zio, rr);
|
||||
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||
for (int j = 0; j < rm->rm_nrows; j++) {
|
||||
rr = rm->rm_row[j];
|
||||
|
||||
/*
|
||||
* vdev_raidz_io_done_verified
|
||||
* will have already attempted
|
||||
* to load reconstructed data
|
||||
* back into memory, so this
|
||||
* line should just drop any
|
||||
* remaining handles
|
||||
*
|
||||
* not sure why onload_parity
|
||||
* has to be set to B_TRUE
|
||||
*/
|
||||
zia_raidz_rec_cleanup(zio, rr,
|
||||
B_TRUE, B_TRUE);
|
||||
}
|
||||
|
||||
zio->io_can_offload = B_FALSE;
|
||||
zio_vdev_io_redone(zio);
|
||||
return;
|
||||
}
|
||||
}
|
||||
zio_checksum_verified(zio);
|
||||
} else {
|
||||
|
@ -3473,6 +3767,12 @@ vdev_raidz_io_done(zio_t *zio)
|
|||
rm->rm_row[i]);
|
||||
}
|
||||
if (nread != 0) {
|
||||
/* drop handles */
|
||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||
raidz_row_t *rr = rm->rm_row[i];
|
||||
zia_raidz_rec_cleanup(zio, rr,
|
||||
B_TRUE, B_FALSE);
|
||||
}
|
||||
/*
|
||||
* Normally our stage is VDEV_IO_DONE, but if
|
||||
* we've already called redone(), it will have
|
||||
|
@ -3532,6 +3832,14 @@ vdev_raidz_io_done(zio_t *zio)
|
|||
* that is also a known failure, that's fine.
|
||||
*/
|
||||
zio->io_error = vdev_raidz_combrec(zio);
|
||||
|
||||
if (zio->io_error == ZIA_ACCELERATOR_DOWN) {
|
||||
zio->io_error = 0;
|
||||
zio->io_can_offload = B_FALSE;
|
||||
zio_vdev_io_redone(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (zio->io_error == ECKSUM &&
|
||||
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
||||
vdev_raidz_io_done_unrecoverable(zio);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or https://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#ifdef ZIA
|
||||
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_disk.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <sys/zia.h>
|
||||
#include <sys/zia_cddl.h>
|
||||
#include <sys/zia_private.h>
|
||||
#include <sys/zio_compress.h>
|
||||
|
||||
/* basically a duplicate of zio_compress_data */
|
||||
int
|
||||
zia_compress_impl(const dpusm_uf_t *dpusm, zia_props_t *props,
|
||||
enum zio_compress c, abd_t *src, size_t s_len,
|
||||
void **cbuf_handle, uint64_t *c_len,
|
||||
uint8_t level, boolean_t *local_offload)
|
||||
{
|
||||
size_t d_len;
|
||||
uint8_t complevel;
|
||||
zio_compress_info_t *ci = &zio_compress_table[c];
|
||||
int ret = ZIA_OK;
|
||||
|
||||
ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
|
||||
ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL);
|
||||
|
||||
/*
|
||||
* If the data is all zeros, we don't even need to allocate
|
||||
* a block for it. We indicate this by returning zero size.
|
||||
*/
|
||||
if (!ABD_HANDLE(src)) {
|
||||
/* check in-memory buffer for zeros */
|
||||
if (abd_cmp_zero(src, s_len) == 0) {
|
||||
*c_len = 0;
|
||||
return (ZIA_OK);
|
||||
}
|
||||
|
||||
if (c == ZIO_COMPRESS_EMPTY) {
|
||||
*c_len = s_len;
|
||||
return (ZIA_OK);
|
||||
}
|
||||
|
||||
/* check that compression can be done before offloading */
|
||||
dpusm_pc_t *caps = NULL;
|
||||
if ((zia_get_capabilities(props->provider, &caps) != ZIA_OK) ||
|
||||
!(caps->compress & compress_to_dpusm(c))) {
|
||||
return (ZIA_FALLBACK);
|
||||
}
|
||||
|
||||
ret = zia_offload_abd(props->provider, src, s_len,
|
||||
props->min_offload_size, local_offload, B_FALSE);
|
||||
if (ret != ZIA_OK) {
|
||||
return (ret);
|
||||
}
|
||||
} else {
|
||||
/* came in offloaded */
|
||||
void *old_provider = dpusm->extract(ABD_HANDLE(src));
|
||||
if (old_provider != props->provider) {
|
||||
return (ZIA_PROVIDER_MISMATCH);
|
||||
}
|
||||
|
||||
/* use provider to check for zero buffer */
|
||||
ret = dpusm->all_zeros(ABD_HANDLE(src), 0, s_len);
|
||||
if (ret == DPUSM_OK) {
|
||||
*c_len = 0;
|
||||
return (ZIA_OK);
|
||||
} else if (ret != DPUSM_BAD_RESULT) {
|
||||
return (dpusm_to_ret(ret));
|
||||
}
|
||||
|
||||
if (c == ZIO_COMPRESS_EMPTY) {
|
||||
*c_len = s_len;
|
||||
return (ZIA_OK);
|
||||
}
|
||||
|
||||
dpusm_pc_t *caps = NULL;
|
||||
ret = zia_get_capabilities(props->provider, &caps);
|
||||
if (ret != ZIA_OK) {
|
||||
return (ret);
|
||||
}
|
||||
|
||||
if (!(caps->compress & compress_to_dpusm(c))) {
|
||||
return (ZIA_FALLBACK);
|
||||
}
|
||||
}
|
||||
|
||||
/* Compress at least 12.5% */
|
||||
d_len = s_len - (s_len >> 3);
|
||||
|
||||
complevel = ci->ci_level;
|
||||
|
||||
if (c == ZIO_COMPRESS_ZSTD) {
|
||||
/* If we don't know the level, we can't compress it */
|
||||
if (level == ZIO_COMPLEVEL_INHERIT) {
|
||||
*c_len = s_len;
|
||||
return (ZIA_OK);
|
||||
}
|
||||
|
||||
if (level == ZIO_COMPLEVEL_DEFAULT)
|
||||
complevel = ZIO_ZSTD_LEVEL_DEFAULT;
|
||||
else
|
||||
complevel = level;
|
||||
|
||||
ASSERT3U(complevel, !=, ZIO_COMPLEVEL_INHERIT);
|
||||
}
|
||||
|
||||
/* nothing to offload, so just allocate space */
|
||||
*cbuf_handle = zia_alloc(props->provider,
|
||||
s_len, props->min_offload_size);
|
||||
if (!*cbuf_handle) {
|
||||
return (ZIA_ERROR);
|
||||
}
|
||||
|
||||
/* DPUSM interface takes in a size_t, not a uint64_t */
|
||||
size_t zia_c_len = (size_t)s_len;
|
||||
ret = dpusm->compress(compress_to_dpusm(c), (int8_t)level,
|
||||
ABD_HANDLE(src), s_len, *cbuf_handle, &zia_c_len);
|
||||
if (ret != DPUSM_OK) {
|
||||
zia_free(cbuf_handle);
|
||||
return (dpusm_to_ret(ret));
|
||||
}
|
||||
|
||||
*c_len = zia_c_len;
|
||||
|
||||
/*
|
||||
* Return ZIA_OK because this is not an error - it just didn't
|
||||
* compress well. The data will be dropped later on (instead of
|
||||
* onloaded) because c_len is too big.
|
||||
*/
|
||||
if (*c_len > d_len) {
|
||||
*c_len = s_len;
|
||||
}
|
||||
|
||||
return (ZIA_OK);
|
||||
}
|
||||
|
||||
int
|
||||
zia_raidz_rec_impl(const dpusm_uf_t *dpusm,
|
||||
raidz_row_t *rr, int *t, int nt)
|
||||
{
|
||||
int tgts[VDEV_RAIDZ_MAXPARITY];
|
||||
int ntgts = 0;
|
||||
for (int i = 0, c = 0; c < rr->rr_cols; c++) {
|
||||
if (i < nt && c == t[i]) {
|
||||
tgts[ntgts++] = c;
|
||||
i++;
|
||||
} else if (rr->rr_col[c].rc_error != 0) {
|
||||
tgts[ntgts++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(ntgts >= nt);
|
||||
|
||||
return (dpusm->raid.rec(rr->rr_zia_handle,
|
||||
tgts, ntgts));
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
/* called by provider */
|
||||
void
|
||||
zia_disk_write_completion(void *zio_ptr, int error)
|
||||
{
|
||||
zio_t *zio = (zio_t *)zio_ptr;
|
||||
zio->io_error = error;
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
|
||||
zio_delay_interrupt(zio);
|
||||
}
|
||||
|
||||
/* called by provider */
|
||||
void
|
||||
zia_disk_flush_completion(void *zio_ptr, int error)
|
||||
{
|
||||
zio_t *zio = (zio_t *)zio_ptr;
|
||||
|
||||
if (zio->io_error && (zio->io_error == EOPNOTSUPP))
|
||||
zio->io_vd->vdev_nowritecache = B_TRUE;
|
||||
|
||||
ASSERT3S(zio->io_error, >=, 0);
|
||||
if (zio->io_error)
|
||||
vdev_disk_error(zio);
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* ZIA */
|
206
module/zfs/zio.c
206
module/zfs/zio.c
|
@ -51,6 +51,7 @@
|
|||
#include <sys/trace_zfs.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/dsl_crypt.h>
|
||||
#include <sys/zia.h>
|
||||
#include <cityhash.h>
|
||||
|
||||
/*
|
||||
|
@ -448,12 +449,12 @@ zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize,
|
|||
zio->io_size = size;
|
||||
}
|
||||
|
||||
void
|
||||
zio_pop_transforms(zio_t *zio)
|
||||
zio_transform_t *
|
||||
zio_pop_transform(zio_t *zio)
|
||||
{
|
||||
zio_transform_t *zt;
|
||||
zio_transform_t *zt = zio->io_transform_stack;
|
||||
|
||||
while ((zt = zio->io_transform_stack) != NULL) {
|
||||
if (zt != NULL) {
|
||||
if (zt->zt_transform != NULL)
|
||||
zt->zt_transform(zio,
|
||||
zt->zt_orig_abd, zt->zt_orig_size);
|
||||
|
@ -467,6 +468,15 @@ zio_pop_transforms(zio_t *zio)
|
|||
|
||||
kmem_free(zt, sizeof (zio_transform_t));
|
||||
}
|
||||
|
||||
return (zt);
|
||||
}
|
||||
|
||||
void
|
||||
zio_pop_transforms(zio_t *zio)
|
||||
{
|
||||
while (zio_pop_transform(zio)) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -487,9 +497,40 @@ static void
|
|||
zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
|
||||
{
|
||||
if (zio->io_error == 0) {
|
||||
int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
|
||||
zio->io_abd, data, zio->io_size, size,
|
||||
&zio->io_prop.zp_complevel);
|
||||
int ret = ZIA_FALLBACK;
|
||||
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||
if ((zia_props->decompress == 1) &&
|
||||
(zio->io_can_offload == B_TRUE)) {
|
||||
ret = zia_decompress(zia_props,
|
||||
BP_GET_COMPRESS(zio->io_bp),
|
||||
zio->io_abd, zio->io_size,
|
||||
data, size,
|
||||
&zio->io_prop.zp_complevel);
|
||||
}
|
||||
|
||||
if (ret == ZIA_OK) {
|
||||
ASSERT(zia_is_offloaded(zio->io_abd) == B_TRUE);
|
||||
/*
|
||||
* bring data back into memory since there
|
||||
* are no subsequent offloaded stages
|
||||
*/
|
||||
ret = zia_onload_abd(data, size, B_FALSE);
|
||||
}
|
||||
|
||||
ASSERT(zia_is_offloaded(data) != B_TRUE);
|
||||
/* let abd_free clean up zio->io_abd */
|
||||
|
||||
if (ret == ZIA_OK) {
|
||||
ret = 0;
|
||||
} else {
|
||||
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_disable_offloading(zio, B_FALSE);
|
||||
}
|
||||
|
||||
ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
|
||||
zio->io_abd, data, zio->io_size, size,
|
||||
&zio->io_prop.zp_complevel);
|
||||
}
|
||||
|
||||
if (zio_injection_enabled && ret == 0)
|
||||
ret = zio_handle_fault_injection(zio, EINVAL);
|
||||
|
@ -801,6 +842,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
|
|||
if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
|
||||
*errorp = zio_worst_error(*errorp, zio->io_error);
|
||||
pio->io_reexecute |= zio->io_reexecute;
|
||||
if ((zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) &&
|
||||
(zio->io_can_offload != B_TRUE)) {
|
||||
pio->io_flags |= ZIO_FLAG_ZIA_REEXECUTE;
|
||||
pio->io_can_offload = B_FALSE;
|
||||
}
|
||||
ASSERT3U(*countp, >, 0);
|
||||
|
||||
(*countp)--;
|
||||
|
@ -853,6 +899,10 @@ zio_inherit_child_errors(zio_t *zio, enum zio_child c)
|
|||
{
|
||||
if (zio->io_child_error[c] != 0 && zio->io_error == 0)
|
||||
zio->io_error = zio->io_child_error[c];
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) {
|
||||
zio->io_can_offload = B_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -974,7 +1024,13 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
|||
if (zb != NULL)
|
||||
zio->io_bookmark = *zb;
|
||||
|
||||
zio->io_can_offload = zia_get_props(spa)->can_offload;
|
||||
|
||||
if (pio != NULL) {
|
||||
if ((pio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) ||
|
||||
(pio->io_can_offload != B_TRUE)) {
|
||||
zio->io_can_offload = B_FALSE;
|
||||
}
|
||||
zio->io_metaslab_class = pio->io_metaslab_class;
|
||||
if (zio->io_logical == NULL)
|
||||
zio->io_logical = pio->io_logical;
|
||||
|
@ -983,6 +1039,13 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
|||
zio_add_child_first(pio, zio);
|
||||
}
|
||||
|
||||
/* turn off encryption and dedup if Z.I.A. is used */
|
||||
if (zia_is_used(zio) == B_TRUE) {
|
||||
zio->io_prop.zp_dedup = B_FALSE;
|
||||
zio->io_prop.zp_dedup_verify = B_FALSE;
|
||||
zio->io_prop.zp_encrypt = B_FALSE;
|
||||
}
|
||||
|
||||
taskq_init_ent(&zio->io_tqent);
|
||||
|
||||
return (zio);
|
||||
|
@ -1865,24 +1928,92 @@ zio_write_compress(zio_t *zio)
|
|||
/* If it's a compressed write that is not raw, compress the buffer. */
|
||||
if (compress != ZIO_COMPRESS_OFF &&
|
||||
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
|
||||
boolean_t ran_compress = B_FALSE;
|
||||
boolean_t local_offload = B_FALSE;
|
||||
|
||||
abd_t *cabd = NULL;
|
||||
if (abd_cmp_zero(zio->io_abd, lsize) == 0)
|
||||
if (abd_cmp_zero(zio->io_abd, lsize) == 0) {
|
||||
psize = 0;
|
||||
else if (compress == ZIO_COMPRESS_EMPTY)
|
||||
} else if (compress == ZIO_COMPRESS_EMPTY) {
|
||||
psize = lsize;
|
||||
else
|
||||
psize = zio_compress_data(compress, zio->io_abd, &cabd,
|
||||
lsize, zp->zp_complevel);
|
||||
} else {
|
||||
int zia_rc = ZIA_FALLBACK;
|
||||
zia_props_t *zia_props = zia_get_props(spa);
|
||||
if ((zia_props->compress == 1) &&
|
||||
(zio->io_can_offload == B_TRUE)) {
|
||||
zia_rc = zia_compress(zia_props, compress,
|
||||
zio->io_abd, lsize, &cabd, &psize,
|
||||
zp->zp_complevel, &local_offload);
|
||||
}
|
||||
|
||||
if (zia_rc != ZIA_OK) {
|
||||
ASSERT(zia_is_offloaded(cabd) == B_FALSE);
|
||||
|
||||
zia_rc = zia_cleanup_abd(zio->io_abd,
|
||||
lsize, local_offload, B_FALSE);
|
||||
|
||||
/*
|
||||
* if data has to be brought back for cpu
|
||||
* compression, but could not, restart the
|
||||
* pipeline for this zio (not necessary
|
||||
* in this case, but still doing it here
|
||||
* in case a previous stage is offloaded)
|
||||
*/
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_restart_before_vdev(zio);
|
||||
return (zio);
|
||||
}
|
||||
|
||||
psize = zio_compress_data(compress, zio->io_abd,
|
||||
&cabd, lsize, zp->zp_complevel);
|
||||
}
|
||||
ran_compress = B_TRUE;
|
||||
}
|
||||
|
||||
if (psize == 0) {
|
||||
ASSERT(ran_compress == B_FALSE);
|
||||
ASSERT(zia_is_offloaded(cabd) == B_FALSE);
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
} else if (psize >= lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
if (cabd != NULL)
|
||||
if (cabd != NULL) {
|
||||
abd_free(cabd);
|
||||
}
|
||||
/* source abd is still offloaded */
|
||||
} else if (!zp->zp_dedup && !zp->zp_encrypt &&
|
||||
psize <= BPE_PAYLOAD_SIZE &&
|
||||
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
|
||||
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
||||
ASSERT(cabd != NULL);
|
||||
|
||||
/*
|
||||
* Remove offloaded source abd. Return value does not
|
||||
* matter: if this fails, the data can't be brought back
|
||||
* anyways, but the in-memory version is still valid
|
||||
*
|
||||
* Should onload cabd first, but that might error,
|
||||
* leaving this zio offloaded unnecessarily (next
|
||||
* attempt will run on CPU). Adding zia_cleanup_abd
|
||||
* into the if block seems unnecessary.
|
||||
*/
|
||||
zia_cleanup_abd(zio->io_abd, lsize,
|
||||
local_offload, B_FALSE);
|
||||
|
||||
/*
|
||||
* compressed enough, but not handling embedded
|
||||
* data, so move compressed data back into memory
|
||||
*
|
||||
* if failed, recompress with cpu compression
|
||||
*/
|
||||
const int zia_rc = zia_onload_abd(cabd, psize, B_FALSE);
|
||||
if ((zia_rc != ZIA_OK) &&
|
||||
(zia_rc != ZIA_ERROR) &&
|
||||
(zia_rc != ZIA_DISABLED)) {
|
||||
abd_free(cabd);
|
||||
zia_restart_before_vdev(zio);
|
||||
return (zio);
|
||||
}
|
||||
|
||||
void *cbuf = abd_borrow_buf_copy(cabd, lsize);
|
||||
encode_embedded_bp_compressed(bp,
|
||||
cbuf, compress, lsize, psize);
|
||||
|
@ -1897,6 +2028,8 @@ zio_write_compress(zio_t *zio)
|
|||
SPA_FEATURE_EMBEDDED_DATA));
|
||||
return (zio);
|
||||
} else {
|
||||
ASSERT(cabd != NULL);
|
||||
|
||||
/*
|
||||
* Round compressed size up to the minimum allocation
|
||||
* size of the smallest-ashift device, and zero the
|
||||
|
@ -1910,9 +2043,33 @@ zio_write_compress(zio_t *zio)
|
|||
if (rounded >= lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
abd_free(cabd);
|
||||
zia_cleanup_abd(zio->io_abd, lsize,
|
||||
local_offload, B_FALSE);
|
||||
psize = lsize;
|
||||
} else {
|
||||
abd_zero_off(cabd, psize, rounded - psize);
|
||||
if (zia_is_offloaded(cabd)) {
|
||||
if (zia_zero_fill(cabd, psize,
|
||||
rounded - psize) != ZIA_OK) {
|
||||
if (zia_onload_abd(cabd, psize,
|
||||
B_FALSE) != ZIA_OK) {
|
||||
abd_free(cabd);
|
||||
zia_cleanup_abd(
|
||||
zio->io_abd,
|
||||
lsize,
|
||||
local_offload,
|
||||
B_FALSE);
|
||||
zia_restart_before_vdev(
|
||||
zio);
|
||||
return (zio);
|
||||
}
|
||||
}
|
||||
|
||||
zio->io_flags |=
|
||||
ZIO_FLAG_DONT_AGGREGATE;
|
||||
} else {
|
||||
abd_zero_off(cabd, psize,
|
||||
rounded - psize);
|
||||
}
|
||||
psize = rounded;
|
||||
zio_push_transform(zio, cabd,
|
||||
psize, lsize, NULL);
|
||||
|
@ -4280,6 +4437,13 @@ zio_vdev_io_start(zio_t *zio)
|
|||
if (zio->io_type == ZIO_TYPE_WRITE) {
|
||||
abd_copy(abuf, zio->io_abd, zio->io_size);
|
||||
abd_zero_off(abuf, zio->io_size, asize - zio->io_size);
|
||||
/*
|
||||
* The Z.I.A. handles of the abds that come here
|
||||
* were not modified and do not get associated with
|
||||
* abuf during the transform. Instead of dropping
|
||||
* the handle and delaying here, let abd_free clean
|
||||
* it up later.
|
||||
*/
|
||||
}
|
||||
zio_push_transform(zio, abuf, asize, asize, zio_subblock);
|
||||
}
|
||||
|
@ -4484,6 +4648,8 @@ zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr)
|
|||
{
|
||||
void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
|
||||
|
||||
zia_onload_abd(zio->io_abd, zio->io_size, B_FALSE);
|
||||
|
||||
abd_copy(abd, zio->io_abd, zio->io_size);
|
||||
|
||||
zcr->zcr_cbinfo = zio->io_size;
|
||||
|
@ -4518,7 +4684,9 @@ zio_vdev_io_assess(zio_t *zio)
|
|||
* On retry, we cut in line in the issue queue, since we don't want
|
||||
* compression/checksumming/etc. work to prevent our (cheap) IO reissue.
|
||||
*/
|
||||
if (zio->io_error && vd == NULL &&
|
||||
if (zio->io_error &&
|
||||
!(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) &&
|
||||
vd == NULL &&
|
||||
!(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) {
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */
|
||||
|
@ -5145,6 +5313,8 @@ zio_done(zio_t *zio)
|
|||
}
|
||||
|
||||
if (zio->io_error) {
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE));
|
||||
|
||||
/*
|
||||
* If this I/O is attached to a particular vdev,
|
||||
* generate an error message describing the I/O failure
|
||||
|
@ -5179,7 +5349,10 @@ zio_done(zio_t *zio)
|
|||
}
|
||||
}
|
||||
|
||||
if (zio->io_error && zio == zio->io_logical) {
|
||||
if ((zio->io_error ||
|
||||
(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) ||
|
||||
0) &&
|
||||
zio == zio->io_logical) {
|
||||
/*
|
||||
* Determine whether zio should be reexecuted. This will
|
||||
* propagate all the way to the root via zio_notify_parent().
|
||||
|
@ -5550,6 +5723,7 @@ EXPORT_SYMBOL(zio_buf_alloc);
|
|||
EXPORT_SYMBOL(zio_data_buf_alloc);
|
||||
EXPORT_SYMBOL(zio_buf_free);
|
||||
EXPORT_SYMBOL(zio_data_buf_free);
|
||||
EXPORT_SYMBOL(zio_push_transform);
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_zio, zio_, slow_io_ms, INT, ZMOD_RW,
|
||||
"Max I/O completion time (milliseconds) before marking it as slow");
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zil.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/zia.h>
|
||||
#include <zfs_fletcher.h>
|
||||
|
||||
/*
|
||||
|
@ -357,6 +358,13 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
|||
zio_eck_t eck;
|
||||
size_t eck_offset;
|
||||
|
||||
/* not handling embedded checksums, so bring back data */
|
||||
const int zia_rc = zia_cleanup_abd(abd, size, B_FALSE, B_FALSE);
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_restart_before_vdev(zio);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&saved, 0, sizeof (zio_cksum_t));
|
||||
|
||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||
|
@ -403,8 +411,31 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
|||
sizeof (zio_cksum_t));
|
||||
} else {
|
||||
saved = bp->blk_cksum;
|
||||
|
||||
int zia_rc = ZIA_FALLBACK;
|
||||
|
||||
/* only offload non-embedded checksums */
|
||||
boolean_t local_offload = B_FALSE;
|
||||
zia_props_t *zia_props = zia_get_props(spa);
|
||||
if ((zia_props->checksum == 1) &&
|
||||
(zio->io_can_offload == B_TRUE)) {
|
||||
zia_rc = zia_checksum_compute(zia_props->provider,
|
||||
&cksum, checksum, zio, size, &local_offload);
|
||||
}
|
||||
|
||||
/* fall back to ZFS implementation */
|
||||
if (zia_rc != ZIA_OK) {
|
||||
zia_rc = zia_cleanup_abd(abd, size, local_offload,
|
||||
B_FALSE);
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
zia_restart_before_vdev(zio);
|
||||
return;
|
||||
}
|
||||
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
|
||||
&cksum);
|
||||
} else {
|
||||
zio->io_flags |= ZIO_FLAG_DONT_AGGREGATE;
|
||||
}
|
||||
if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
||||
zio_checksum_handle_crypt(&cksum, &saved, insecure);
|
||||
bp->blk_cksum = cksum;
|
||||
|
@ -433,6 +464,12 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
|||
zio_cksum_t verifier;
|
||||
size_t eck_offset;
|
||||
|
||||
/* not handling embedded checksums, so bring back data */
|
||||
const int zia_rc = zia_cleanup_abd(abd, size, B_FALSE, B_FALSE);
|
||||
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||
return (zia_rc);
|
||||
}
|
||||
|
||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||
zil_chain_t zilc;
|
||||
uint64_t nused;
|
||||
|
@ -494,8 +531,25 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
|||
} else {
|
||||
byteswap = BP_SHOULD_BYTESWAP(bp);
|
||||
expected_cksum = bp->blk_cksum;
|
||||
ci->ci_func[byteswap](abd, size,
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
|
||||
zia_props_t *zia_props = zia_get_props(spa);
|
||||
int error = ZIA_FALLBACK;
|
||||
if ((zia_props->can_offload == B_TRUE) &&
|
||||
(zia_props->checksum == 1)) {
|
||||
error = zia_checksum_error(checksum, abd, size,
|
||||
byteswap, &actual_cksum);
|
||||
}
|
||||
|
||||
/* fall back to ZFS implementation */
|
||||
if ((error != ZIA_OK) && (error != ECKSUM)) {
|
||||
/* data was modified by reconstruction */
|
||||
error = zia_onload_abd(abd, size, B_FALSE);
|
||||
if (error == ZIA_ACCELERATOR_DOWN) {
|
||||
return (error);
|
||||
}
|
||||
ci->ci_func[byteswap](abd, size,
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -0,0 +1,921 @@
|
|||
/*
|
||||
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||
*
|
||||
* This program was produced under U.S. Government contract
|
||||
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
* is operated by Triad National Security, LLC for the U.S.
|
||||
* Department of Energy/National Nuclear Security Administration. All
|
||||
* rights in the program are reserved by Triad National Security, LLC,
|
||||
* and the U.S. Department of Energy/National Nuclear Security
|
||||
* Administration. The Government is granted for itself and others
|
||||
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
* license in this material to reproduce, prepare derivative works,
|
||||
* distribute copies to the public, perform publicly and display
|
||||
* publicly, and to permit others to do so.
|
||||
*
|
||||
* ----
|
||||
*
|
||||
* This program is open source under the BSD-3 License.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <sys/abd.h>
|
||||
#include <sys/spa_checksum.h>
|
||||
#include <sys/vdev_disk.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/vdev_raidz_impl.h>
|
||||
#include <sys/zfs_file.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zmod.h>
|
||||
#include <zfs_fletcher.h>
|
||||
|
||||
#include "kernel_offloader.h"
|
||||
|
||||
static const char NAME[] = "Kernel Offloader";
|
||||
static const size_t NAME_LEN = sizeof (NAME);
|
||||
|
||||
typedef enum kernel_offloader_handle_type {
|
||||
KOH_REAL, /* default type - convert all data into a single blob */
|
||||
KOH_REFERENCE,
|
||||
|
||||
KOH_INVALID,
|
||||
} koht_t;
|
||||
|
||||
/* offloaded data (not defined outside of "hardware") */
|
||||
typedef struct kernel_offloader_handle {
|
||||
koht_t type;
|
||||
void *ptr;
|
||||
size_t size;
|
||||
} koh_t;
|
||||
|
||||
/* **************************************** */
|
||||
/* memory bookkeeping */
|
||||
rwlock_t rwlock; /* atomic ints are not big enough */
|
||||
|
||||
/* never decreases */
|
||||
static size_t total_count; /* number of times alloc/alloc_ref was called */
|
||||
static size_t total_size; /* buffer size */
|
||||
static size_t total_actual; /* buffer size + any extra memory */
|
||||
|
||||
/* currently active */
|
||||
static size_t active_count; /* number of times alloc/alloc_ref was called */
|
||||
static size_t active_size; /* buffer size */
|
||||
static size_t active_actual; /* buffer size + any extra memory */
|
||||
/* **************************************** */
|
||||
|
||||
/* **************************************** */
|
||||
/* set kernel offloader to DOWN state */
|
||||
typedef struct kernel_offloader_down {
|
||||
rwlock_t rwlock;
|
||||
int count;
|
||||
int max;
|
||||
int printed;
|
||||
} kod_t;
|
||||
|
||||
#define kod_init(name, max_val) \
|
||||
do { \
|
||||
rwlock_init(&name.rwlock); \
|
||||
name.count = 0; \
|
||||
name.max = max_val; \
|
||||
name.printed = 0; \
|
||||
} while (0)
|
||||
|
||||
#define kod_inc(name) \
|
||||
do { \
|
||||
write_lock(&name.rwlock); \
|
||||
name.count++; \
|
||||
write_unlock(&name.rwlock); \
|
||||
} while (0)
|
||||
|
||||
#define kod_ret(name) \
|
||||
do { \
|
||||
if (name.max) { \
|
||||
write_lock(&name.rwlock); \
|
||||
if (name.count > name.max) { \
|
||||
if (!name.printed) { \
|
||||
printk("%s\n", #name); \
|
||||
name.printed = 1; \
|
||||
} \
|
||||
write_unlock(&name.rwlock); \
|
||||
return (DPUSM_PROVIDER_INVALIDATED); \
|
||||
} \
|
||||
write_unlock(&name.rwlock); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define kod_run(name) \
|
||||
do { \
|
||||
kod_inc(name); \
|
||||
kod_ret(name); \
|
||||
} while (0)
|
||||
|
||||
/* can probably do with macros */
|
||||
static kod_t copy_from_generic_down; static int copy_from_generic_down_max = 0;
|
||||
module_param(copy_from_generic_down_max, int, 0660);
|
||||
|
||||
static kod_t copy_to_generic_down; static int copy_to_generic_down_max = 0;
|
||||
module_param(copy_to_generic_down_max, int, 0660);
|
||||
|
||||
static kod_t cmp_down; static int cmp_down_max = 0;
|
||||
module_param(cmp_down_max, int, 0660);
|
||||
|
||||
static kod_t compress_down; static int compress_down_max = 0;
|
||||
module_param(compress_down_max, int, 0660);
|
||||
|
||||
static kod_t checksum_down; static int checksum_down_max = 0;
|
||||
module_param(checksum_down_max, int, 0660);
|
||||
|
||||
static kod_t raidz_gen_down; static int raidz_gen_down_max = 0;
|
||||
module_param(raidz_gen_down_max, int, 0660);
|
||||
|
||||
static kod_t raidz_rec_down; static int raidz_rec_down_max = 0;
|
||||
module_param(raidz_rec_down_max, int, 0660);
|
||||
|
||||
static kod_t disk_write_down; static int disk_write_down_max = 0;
|
||||
module_param(disk_write_down_max, int, 0660);
|
||||
/* **************************************** */
|
||||
|
||||
/*
|
||||
* value used to swizzle the pointer so that
|
||||
* dereferencing the handle will fail
|
||||
*/
|
||||
static void *mask = NULL;
|
||||
void
|
||||
kernel_offloader_init(void)
|
||||
{
|
||||
get_random_bytes(&mask, sizeof (mask));
|
||||
rwlock_init(&rwlock);
|
||||
total_count = 0;
|
||||
total_size = 0;
|
||||
total_actual = 0;
|
||||
active_count = 0;
|
||||
active_size = 0;
|
||||
active_actual = 0;
|
||||
|
||||
kod_init(copy_from_generic_down, copy_from_generic_down_max);
|
||||
kod_init(copy_to_generic_down, copy_to_generic_down_max);
|
||||
kod_init(cmp_down, cmp_down_max);
|
||||
kod_init(compress_down, compress_down_max);
|
||||
kod_init(checksum_down, checksum_down_max);
|
||||
kod_init(raidz_gen_down, raidz_gen_down_max);
|
||||
kod_init(raidz_rec_down, raidz_rec_down_max);
|
||||
kod_init(disk_write_down, disk_write_down_max);
|
||||
|
||||
printk("kernel offloader init: %p\n", mask);
|
||||
}
|
||||
|
||||
void
|
||||
kernel_offloader_fini(void)
|
||||
{
|
||||
mask = NULL;
|
||||
|
||||
printk("kernel offloader fini with "
|
||||
"%zu/%zu (actual %zu/%zu) bytes "
|
||||
"in %zu/%zu allocations remaining\n",
|
||||
active_size, total_size,
|
||||
active_actual, total_actual,
|
||||
active_count, total_count);
|
||||
}
|
||||
|
||||
/* get a starting address of a linear koh_t */
|
||||
static void *
|
||||
ptr_start(koh_t *koh, size_t offset)
|
||||
{
|
||||
return (void *)(((uintptr_t)koh->ptr) + offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* convert the actual pointer to a handle (pretend
|
||||
* the data is not accessible from the Z.I.A. base)
|
||||
*/
|
||||
static void *
|
||||
swizzle(void *ptr)
|
||||
{
|
||||
return (ptr?((void *)(((uintptr_t)ptr) ^ ((uintptr_t)mask))):NULL);
|
||||
}
|
||||
|
||||
/* convert the handle to a usable pointer */
|
||||
static void *
|
||||
unswizzle(void *handle)
|
||||
{
|
||||
return (swizzle(handle));
|
||||
}
|
||||
|
||||
static koh_t *
|
||||
koh_alloc(size_t size)
|
||||
{
|
||||
koh_t *koh = kmalloc(sizeof (koh_t), GFP_KERNEL);
|
||||
if (koh) {
|
||||
koh->type = KOH_REAL;
|
||||
koh->ptr = kmalloc(size, GFP_KERNEL);
|
||||
koh->size = size;
|
||||
|
||||
write_lock(&rwlock);
|
||||
total_count++;
|
||||
active_count++;
|
||||
|
||||
/* the allocation itself */
|
||||
total_size += size;
|
||||
active_size += size;
|
||||
total_actual += size;
|
||||
active_actual += size;
|
||||
|
||||
/* the wrapper struct */
|
||||
total_actual += sizeof (koh_t);
|
||||
active_actual += sizeof (koh_t);
|
||||
|
||||
write_unlock(&rwlock);
|
||||
}
|
||||
|
||||
return (koh);
|
||||
}
|
||||
|
||||
static koh_t *
|
||||
koh_alloc_ref(koh_t *src, size_t offset, size_t size)
|
||||
{
|
||||
koh_t *ref = NULL;
|
||||
if (src) {
|
||||
koh_t *src_koh = (koh_t *)src;
|
||||
|
||||
if ((offset + size) > src_koh->size) {
|
||||
printk("Error: Cannot reference handle of size %zu "
|
||||
"starting at offset %zu with size %zu\n",
|
||||
src_koh->size, offset, size);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
ref = kmalloc(sizeof (koh_t), GFP_KERNEL);
|
||||
if (ref) {
|
||||
ref->type = KOH_REFERENCE;
|
||||
ref->ptr = ptr_start(src, offset);
|
||||
ref->size = size;
|
||||
|
||||
write_lock(&rwlock);
|
||||
total_count++;
|
||||
active_count++;
|
||||
|
||||
/* no new requested space */
|
||||
|
||||
/* the wrapper struct */
|
||||
total_actual += sizeof (koh_t);
|
||||
active_actual += sizeof (koh_t);
|
||||
write_unlock(&rwlock);
|
||||
}
|
||||
}
|
||||
|
||||
return (ref);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_get_size(void *handle, size_t *size, size_t *actual)
|
||||
{
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
|
||||
if (size) {
|
||||
*size = koh->size;
|
||||
}
|
||||
|
||||
if (actual) {
|
||||
*actual = koh->size;
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
koh_free(koh_t *koh)
|
||||
{
|
||||
if (koh) {
|
||||
write_lock(&rwlock);
|
||||
switch (koh->type) {
|
||||
case KOH_REAL:
|
||||
/* the allocation itself */
|
||||
active_size -= koh->size;
|
||||
active_actual -= koh->size;
|
||||
kfree(koh->ptr);
|
||||
break;
|
||||
case KOH_REFERENCE:
|
||||
case KOH_INVALID:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* the wrapper struct */
|
||||
active_actual -= sizeof (koh_t);
|
||||
|
||||
active_count--;
|
||||
write_unlock(&rwlock);
|
||||
|
||||
kfree(koh);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
void *
|
||||
kernel_offloader_alloc(size_t size)
|
||||
{
|
||||
return (swizzle(koh_alloc(size)));
|
||||
}
|
||||
|
||||
void *
|
||||
kernel_offloader_alloc_ref(void *src_handle, size_t offset, size_t size)
|
||||
{
|
||||
return swizzle(koh_alloc_ref(unswizzle(src_handle),
|
||||
offset, size));
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_free(void *handle)
|
||||
{
|
||||
koh_free(unswizzle(handle));
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_copy_from_generic(void *handle, size_t offset,
|
||||
const void *src, size_t size)
|
||||
{
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
if (!koh) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
if ((offset + size) > koh->size) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(copy_from_generic_down);
|
||||
|
||||
void *dst = ptr_start(koh, offset);
|
||||
if (memcpy(dst, src, size) != dst) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_copy_to_generic(void *handle, size_t offset,
|
||||
void *dst, size_t size)
|
||||
{
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
if (!koh) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
if ((offset + size) > koh->size) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(copy_to_generic_down);
|
||||
|
||||
if (memcpy(dst, ptr_start(koh, offset), size) != dst) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_cmp(void *lhs_handle, void *rhs_handle, int *diff)
|
||||
{
|
||||
koh_t *lhs = (koh_t *)unswizzle(lhs_handle);
|
||||
koh_t *rhs = (koh_t *)unswizzle(rhs_handle);
|
||||
|
||||
if (!lhs || !rhs || !diff) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(cmp_down);
|
||||
|
||||
size_t len = rhs->size;
|
||||
if (lhs->size != rhs->size) {
|
||||
len =
|
||||
(lhs->size < rhs->size)?lhs->size:rhs->size;
|
||||
}
|
||||
|
||||
*diff = memcmp(ptr_start(lhs, 0),
|
||||
ptr_start(rhs, 0), len);
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_zero_fill(void *handle, size_t offset, size_t size)
|
||||
{
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
memset(ptr_start(koh, offset), 0, size);
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_all_zeros(void *handle, size_t offset, size_t size)
|
||||
{
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
if (koh->size - offset < size) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
uint64_t *array = ptr_start(koh, offset);
|
||||
size_t i;
|
||||
for (i = 0; i < size / sizeof (uint64_t); i++) {
|
||||
if (array[i]) {
|
||||
return (KERNEL_OFFLOADER_BAD_RESULT);
|
||||
}
|
||||
}
|
||||
|
||||
char *remaining = ptr_start(koh, offset);
|
||||
for (i *= sizeof (uint64_t); i < size; i++) {
|
||||
if (remaining[i]) {
|
||||
return (KERNEL_OFFLOADER_BAD_RESULT);
|
||||
}
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_mem_stats(
|
||||
void *t_count_handle, void *t_size_handle, void *t_actual_handle,
|
||||
void *a_count_handle, void *a_size_handle, void *a_actual_handle)
|
||||
{
|
||||
read_lock(&rwlock);
|
||||
|
||||
if (t_count_handle) {
|
||||
*(size_t *)ptr_start(t_count_handle, 0) =
|
||||
total_count;
|
||||
}
|
||||
|
||||
if (t_size_handle) {
|
||||
*(size_t *)ptr_start(t_size_handle, 0) =
|
||||
total_size;
|
||||
}
|
||||
|
||||
if (t_actual_handle) {
|
||||
*(size_t *)ptr_start(t_actual_handle, 0) =
|
||||
total_actual;
|
||||
}
|
||||
|
||||
if (a_count_handle) {
|
||||
*(size_t *)ptr_start(a_count_handle, 0) =
|
||||
active_count;
|
||||
}
|
||||
|
||||
if (a_size_handle) {
|
||||
*(size_t *)ptr_start(a_size_handle, 0) =
|
||||
active_size;
|
||||
}
|
||||
|
||||
if (a_actual_handle) {
|
||||
*(size_t *)ptr_start(a_actual_handle, 0) =
|
||||
active_actual;
|
||||
}
|
||||
|
||||
read_unlock(&rwlock);
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
/* specific implementation */
|
||||
static int
|
||||
kernel_offloader_gzip_compress(koh_t *src, size_t s_len,
|
||||
koh_t *dst, size_t *d_len, int level)
|
||||
{
|
||||
if (z_compress_level(ptr_start(dst, 0), d_len,
|
||||
ptr_start(src, 0), s_len, level) != Z_OK) {
|
||||
if (*d_len != src->size) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
kernel_offloader_lz4_compress(koh_t *src, koh_t *dst,
|
||||
size_t s_len, int level, size_t *c_len)
|
||||
{
|
||||
*c_len = dst->size;
|
||||
|
||||
if (zfs_lz4_compress_buf(ptr_start(src, 0), ptr_start(dst, 0),
|
||||
s_len, *c_len, level) == s_len) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_compress(dpusm_compress_t alg, int level,
|
||||
void *src, size_t s_len, void *dst, void *d_len)
|
||||
{
|
||||
int status = KERNEL_OFFLOADER_UNAVAILABLE;
|
||||
koh_t *src_koh = NULL;
|
||||
koh_t *dst_koh = NULL;
|
||||
koh_t *d_len_koh = NULL;
|
||||
if (!src || !dst || !d_len) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(compress_down);
|
||||
|
||||
src_koh = (koh_t *)unswizzle(src);
|
||||
dst_koh = (koh_t *)unswizzle(dst);
|
||||
d_len_koh = (koh_t *)unswizzle(d_len);
|
||||
|
||||
if ((DPUSM_COMPRESS_GZIP_1 <= alg) &&
|
||||
(alg <= DPUSM_COMPRESS_GZIP_9)) {
|
||||
status = kernel_offloader_gzip_compress(src_koh, s_len,
|
||||
dst_koh, (size_t *)ptr_start(d_len_koh, 0), level);
|
||||
} else if (alg == DPUSM_COMPRESS_LZ4) {
|
||||
status = kernel_offloader_lz4_compress(src_koh, dst_koh, s_len,
|
||||
level, (size_t *)ptr_start(d_len_koh, 0));
|
||||
}
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
/* specific implementation */
|
||||
static int
|
||||
kernel_offloader_gzip_decompress(koh_t *src, size_t s_len,
|
||||
koh_t *dst, size_t *d_len, int level)
|
||||
{
|
||||
if (z_uncompress(ptr_start(dst, 0), d_len,
|
||||
ptr_start(src, 0), s_len) != Z_OK) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
kernel_offloader_lz4_decompress(koh_t *src, size_t s_len,
|
||||
koh_t *dst, size_t *d_len, int level)
|
||||
{
|
||||
if (zfs_lz4_decompress_buf(ptr_start(src, 0), ptr_start(dst, 0),
|
||||
s_len, *d_len, level) != 0) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_decompress(dpusm_decompress_t alg, void *level,
|
||||
void *src, size_t s_len, void *dst, void *d_len)
|
||||
{
|
||||
int status = KERNEL_OFFLOADER_UNAVAILABLE;
|
||||
koh_t *level_koh = NULL;
|
||||
koh_t *src_koh = NULL;
|
||||
koh_t *dst_koh = NULL;
|
||||
koh_t *d_len_koh = NULL;
|
||||
if (!level || !src || !dst || !d_len) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
level_koh = (koh_t *)unswizzle(level);
|
||||
src_koh = (koh_t *)unswizzle(src);
|
||||
dst_koh = (koh_t *)unswizzle(dst);
|
||||
d_len_koh = (koh_t *)unswizzle(d_len);
|
||||
|
||||
if ((DPUSM_COMPRESS_GZIP_1 <= alg) &&
|
||||
(alg <= DPUSM_COMPRESS_GZIP_9)) {
|
||||
status = kernel_offloader_gzip_decompress(src_koh, s_len,
|
||||
dst_koh, (size_t *)ptr_start(d_len_koh, 0),
|
||||
*(int *)ptr_start(level_koh, 0));
|
||||
} else if (alg == DPUSM_COMPRESS_LZ4) {
|
||||
status = kernel_offloader_lz4_decompress(src_koh, s_len,
|
||||
dst_koh, (size_t *)ptr_start(d_len_koh, 0),
|
||||
*(int *)ptr_start(level_koh, 0));
|
||||
}
|
||||
|
||||
return (status);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_checksum(dpusm_checksum_t alg,
|
||||
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||
void *cksum, size_t cksum_size)
|
||||
{
|
||||
koh_t *data_koh = (koh_t *)unswizzle(data);
|
||||
if (!data_koh) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
zio_cksum_t zcp;
|
||||
if (cksum_size < sizeof (zcp.zc_word)) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(checksum_down);
|
||||
|
||||
/* compute checksum */
|
||||
|
||||
void *buf = ptr_start(data_koh, 0);
|
||||
|
||||
if (alg == DPUSM_CHECKSUM_FLETCHER_2) {
|
||||
fletcher_init(&zcp);
|
||||
if (order == DPUSM_BYTEORDER_NATIVE) {
|
||||
fletcher_2_native(buf, size, NULL, &zcp);
|
||||
} else {
|
||||
fletcher_2_byteswap(buf, size, NULL, &zcp);
|
||||
}
|
||||
} else if (alg == DPUSM_CHECKSUM_FLETCHER_4) {
|
||||
fletcher_init(&zcp);
|
||||
if (order == DPUSM_BYTEORDER_NATIVE) {
|
||||
fletcher_4_native(buf, size, NULL, &zcp);
|
||||
} else {
|
||||
fletcher_4_byteswap(buf, size, NULL, &zcp);
|
||||
}
|
||||
} else {
|
||||
return (DPUSM_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
memcpy(cksum, zcp.zc_word, sizeof (zcp.zc_word));
|
||||
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
void *
|
||||
kernel_offloader_raidz_alloc(size_t nparity, size_t ndata)
|
||||
{
|
||||
const size_t ncols = nparity + ndata;
|
||||
|
||||
const size_t rr_size = offsetof(raidz_row_t, rr_col[ncols]);
|
||||
raidz_row_t *rr = kzalloc(rr_size, GFP_KERNEL);
|
||||
rr->rr_cols = ncols;
|
||||
rr->rr_firstdatacol = nparity;
|
||||
|
||||
write_lock(&rwlock);
|
||||
total_count++;
|
||||
active_count++;
|
||||
|
||||
/* the op struct does not contribute to buffer allocations */
|
||||
total_actual += rr_size;
|
||||
active_actual += rr_size;
|
||||
|
||||
write_unlock(&rwlock);
|
||||
|
||||
return (swizzle(rr));
|
||||
}
|
||||
|
||||
/* attaches a column to the raidz struct */
|
||||
int
|
||||
kernel_offloader_raidz_set_column(void *raidz, uint64_t c,
|
||||
void *col, size_t size)
|
||||
{
|
||||
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||
koh_t *koh = (koh_t *)unswizzle(col);
|
||||
|
||||
if (!rr || !koh) {
|
||||
return (DPUSM_ERROR);
|
||||
}
|
||||
|
||||
/* c is too big */
|
||||
if (c >= rr->rr_cols) {
|
||||
return (DPUSM_ERROR);
|
||||
}
|
||||
|
||||
/* "active" size is larger than allocated size */
|
||||
if (size > koh->size) {
|
||||
return (DPUSM_ERROR);
|
||||
}
|
||||
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
|
||||
/* clean up old column */
|
||||
abd_free(rc->rc_abd);
|
||||
|
||||
/*
|
||||
* rc->rc_abd does not take ownership of koh->ptr,
|
||||
* so don't need to release ownership
|
||||
*/
|
||||
rc->rc_abd = abd_get_from_buf(koh->ptr, size);
|
||||
rc->rc_size = size;
|
||||
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_raidz_free(void *raidz)
|
||||
{
|
||||
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||
if (!rr) {
|
||||
return (DPUSM_ERROR);
|
||||
}
|
||||
|
||||
for (int c = 0; c < rr->rr_cols; c++) {
|
||||
raidz_col_t *rc = &rr->rr_col[c];
|
||||
abd_free(rc->rc_abd);
|
||||
}
|
||||
kfree(rr);
|
||||
|
||||
const size_t rr_size = offsetof(raidz_row_t, rr_col[rr->rr_cols]);
|
||||
|
||||
write_lock(&rwlock);
|
||||
active_count--;
|
||||
active_actual -= rr_size;
|
||||
write_unlock(&rwlock);
|
||||
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_raidz_gen(void *raidz)
|
||||
{
|
||||
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||
if (!rr) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(raidz_gen_down);
|
||||
|
||||
switch (rr->rr_firstdatacol) {
|
||||
case 1:
|
||||
vdev_raidz_generate_parity_p(rr);
|
||||
break;
|
||||
case 2:
|
||||
vdev_raidz_generate_parity_pq(rr);
|
||||
break;
|
||||
case 3:
|
||||
vdev_raidz_generate_parity_pqr(rr);
|
||||
break;
|
||||
}
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_raidz_rec(void *raidz, int *tgts, int ntgts)
|
||||
{
|
||||
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||
if (!rr) {
|
||||
return (KERNEL_OFFLOADER_ERROR);
|
||||
}
|
||||
|
||||
kod_run(raidz_rec_down);
|
||||
|
||||
vdev_raidz_reconstruct_general(rr, tgts, ntgts);
|
||||
|
||||
return (KERNEL_OFFLOADER_OK);
|
||||
}
|
||||
|
||||
void *
|
||||
kernel_offloader_file_open(const char *path, int flags, int mode)
|
||||
{
|
||||
zfs_file_t *fp = NULL;
|
||||
/* on error, fp should still be NULL */
|
||||
zfs_file_open(path, flags, mode, &fp);
|
||||
return (swizzle(fp));
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_file_write(void *fp_handle, void *handle, size_t count,
|
||||
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err)
|
||||
{
|
||||
zfs_file_t *fp = (zfs_file_t *)unswizzle(fp_handle);
|
||||
if (!fp) {
|
||||
return (ENODEV);
|
||||
}
|
||||
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
if (!koh) {
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
*err = zfs_file_pwrite(fp, ptr_start(koh, 0),
|
||||
count, offset, resid);
|
||||
|
||||
if (*err == 0) {
|
||||
void *zeros = kzalloc(trailing_zeros, GFP_KERNEL);
|
||||
*err = zfs_file_pwrite(fp, zeros,
|
||||
trailing_zeros, offset + count, resid);
|
||||
kfree(zeros);
|
||||
}
|
||||
|
||||
return (*err);
|
||||
}
|
||||
|
||||
void
|
||||
kernel_offloader_file_close(void *fp_handle)
|
||||
{
|
||||
zfs_file_close(unswizzle(fp_handle));
|
||||
}
|
||||
|
||||
void *
|
||||
kernel_offloader_disk_open(dpusm_dd_t *disk_data)
|
||||
{
|
||||
return (swizzle(disk_data->bdev));
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_disk_invalidate(void *disk_handle)
|
||||
{
|
||||
struct block_device *bdev =
|
||||
(struct block_device *)unswizzle(disk_handle);
|
||||
invalidate_bdev(bdev);
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_disk_write(void *disk_handle, void *handle, size_t data_size,
|
||||
size_t trailing_zeros, uint64_t io_offset, int flags,
|
||||
dpusm_disk_write_completion_t write_completion, void *wc_args)
|
||||
{
|
||||
struct block_device *bdev =
|
||||
(struct block_device *)unswizzle(disk_handle);
|
||||
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||
|
||||
const size_t io_size = data_size + trailing_zeros;
|
||||
|
||||
kod_run(disk_write_down);
|
||||
|
||||
if (trailing_zeros) {
|
||||
/* create a copy of the data with the trailing zeros attached */
|
||||
void *copy = kzalloc(io_size, GFP_KERNEL);
|
||||
memcpy(copy, ptr_start(koh, 0), data_size);
|
||||
|
||||
write_lock(&rwlock);
|
||||
/* need to keep copy alive, so replace koh->ptr */
|
||||
if (koh->type == KOH_REAL) {
|
||||
/* subtract size of original koh->ptr */
|
||||
active_size -= koh->size;
|
||||
active_actual -= koh->size;
|
||||
|
||||
kfree(koh->ptr);
|
||||
}
|
||||
|
||||
koh->type = KOH_REAL;
|
||||
koh->ptr = copy;
|
||||
koh->size = io_size;
|
||||
|
||||
total_size += io_size;
|
||||
active_size += io_size;
|
||||
total_actual += io_size;
|
||||
active_actual += io_size;
|
||||
|
||||
/* wrapper struct size was not modified */
|
||||
write_unlock(&rwlock);
|
||||
}
|
||||
|
||||
abd_t *abd = abd_get_from_buf(koh->ptr, io_size);
|
||||
zio_push_transform(wc_args, abd, io_size, io_size, NULL);
|
||||
|
||||
/* __vdev_disk_physio already adds write_completion */
|
||||
(void) write_completion;
|
||||
|
||||
return (__vdev_classic_physio(bdev, wc_args,
|
||||
io_size, io_offset, WRITE, flags));
|
||||
}
|
||||
|
||||
int
|
||||
kernel_offloader_disk_flush(void *disk_handle,
|
||||
dpusm_disk_flush_completion_t flush_completion, void *fc_args)
|
||||
{
|
||||
struct block_device *bdev =
|
||||
(struct block_device *)unswizzle(disk_handle);
|
||||
|
||||
/* vdev_disk_io_flush already adds flush completion */
|
||||
(void) flush_completion;
|
||||
|
||||
return (vdev_disk_io_flush(bdev, fc_args));
|
||||
}
|
||||
|
||||
void
|
||||
kernel_offloader_disk_close(void *disk_handle)
|
||||
{}
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||
*
|
||||
* This program was produced under U.S. Government contract
|
||||
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
* is operated by Triad National Security, LLC for the U.S.
|
||||
* Department of Energy/National Nuclear Security Administration. All
|
||||
* rights in the program are reserved by Triad National Security, LLC,
|
||||
* and the U.S. Department of Energy/National Nuclear Security
|
||||
* Administration. The Government is granted for itself and others
|
||||
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
* license in this material to reproduce, prepare derivative works,
|
||||
* distribute copies to the public, perform publicly and display
|
||||
* publicly, and to permit others to do so.
|
||||
*
|
||||
* ----
|
||||
*
|
||||
* This program is open source under the BSD-3 License.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _KERNEL_OFFLOADER_H
|
||||
#define _KERNEL_OFFLOADER_H
|
||||
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <dpusm/provider_api.h>
|
||||
|
||||
/*
|
||||
* This file represents the API provided by a vendor to access their
|
||||
* offloader. The API can be anything the implementor chooses to
|
||||
* expose. There are no limitations on the function signature or
|
||||
* name. They just have to be called correctly in the Z.I.A. provider.
|
||||
* ZFS and Z.I.A. will not need direct access to any data located on
|
||||
* the offloader. Some raw pointers from Z.I.A. will be used directly,
|
||||
* but those will always contain information located in memory.
|
||||
*
|
||||
* -------------------------------------------------------------------
|
||||
*
|
||||
* The kernel offloader fakes offloads by copying data into memory
|
||||
* regions distinct from the calling process's memory space. The
|
||||
* corresponding C file conflates the driver and the "physical" device
|
||||
* since both memory spaces are in kernel space and run on the
|
||||
* CPU. This offloader provides opaque pointers to the provider to
|
||||
* simulate handles to inaccessible memory locations. In order to
|
||||
* prevent the handle from being dereferenced and used successfully by
|
||||
* ZFS or Z.I.A., the handle pointer is masked with a random value
|
||||
* generated at load-time. Other offloaders may choose to present
|
||||
* non-void handles.
|
||||
*/
|
||||
|
||||
/* return values */
|
||||
#define KERNEL_OFFLOADER_OK 0
|
||||
|
||||
/* function is implemented, but the chosen operation is not implemented */
|
||||
#define KERNEL_OFFLOADER_UNAVAILABLE 1
|
||||
|
||||
/* ran, but could not complete */
|
||||
#define KERNEL_OFFLOADER_ERROR 2
|
||||
|
||||
/* ran, but failed a check on a result */
|
||||
#define KERNEL_OFFLOADER_BAD_RESULT 3
|
||||
|
||||
/* "hardware" went down for some reason (overheated, unplugged, etc.) */
|
||||
#define KERNEL_OFFLOADER_DOWN 4
|
||||
|
||||
/*
|
||||
* init function - this should be the kernel module init, but
|
||||
* kernel offloader is not compiled as a separate kernel module
|
||||
*/
|
||||
void kernel_offloader_init(void);
|
||||
void kernel_offloader_fini(void);
|
||||
|
||||
/* offloader handle access */
|
||||
void *kernel_offloader_alloc(size_t size);
|
||||
void *kernel_offloader_alloc_ref(void *src, size_t offset, size_t size);
|
||||
int kernel_offloader_get_size(void *handle, size_t *size, size_t *actual);
|
||||
int kernel_offloader_free(void *handle);
|
||||
int kernel_offloader_copy_from_generic(void *handle, size_t offset,
|
||||
const void *src, size_t size);
|
||||
int kernel_offloader_copy_to_generic(void *handle, size_t offset,
|
||||
void *dst, size_t size);
|
||||
/* status check */
|
||||
int kernel_offloader_mem_stats(
|
||||
void *t_count_handle, void *t_size_handle, void *t_actual_handle,
|
||||
void *a_count_handle, void *a_size_handle, void *a_actual_handle);
|
||||
int kernel_offloader_cmp(void *lhs_handle, void *rhs_handle, int *diff);
|
||||
int kernel_offloader_zero_fill(void *handle, size_t offset, size_t size);
|
||||
int kernel_offloader_all_zeros(void *handle, size_t offset, size_t size);
|
||||
|
||||
/* ZIO Pipeline Stages */
|
||||
|
||||
int kernel_offloader_compress(dpusm_compress_t alg, int level,
|
||||
void *src, size_t s_len, void *dst, void *d_len);
|
||||
|
||||
int kernel_offloader_decompress(dpusm_compress_t alg, void *level,
|
||||
void *src, size_t s_len, void *dst, void *d_len);
|
||||
|
||||
int kernel_offloader_checksum(dpusm_checksum_t alg,
|
||||
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||
void *cksum, size_t cksum_size);
|
||||
|
||||
void *kernel_offloader_raidz_alloc(size_t nparity, size_t ndata);
|
||||
int kernel_offloader_raidz_set_column(void *raidz, uint64_t c,
|
||||
void *col, size_t size);
|
||||
int kernel_offloader_raidz_free(void *raidz);
|
||||
int kernel_offloader_raidz_gen(void *raidz);
|
||||
int kernel_offloader_raidz_rec(void *raidz, int *tgts, int ntgts);
|
||||
|
||||
/* io */
|
||||
void *kernel_offloader_file_open(const char *path, int flags, int mode);
|
||||
int kernel_offloader_file_write(void *fp_handle, void *handle, size_t count,
|
||||
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err);
|
||||
void kernel_offloader_file_close(void *fp_handle);
|
||||
|
||||
void *kernel_offloader_disk_open(dpusm_dd_t *disk_data);
|
||||
int kernel_offloader_disk_reread_part(void *disk_handle);
|
||||
int kernel_offloader_disk_invalidate(void *disk_handle);
|
||||
int kernel_offloader_disk_write(void *disk_handle, void *handle,
|
||||
size_t data_size, size_t trailing_zeros, uint64_t io_offset, int flags,
|
||||
dpusm_disk_write_completion_t write_completion, void *wc_args);
|
||||
int kernel_offloader_disk_flush(void *disk_handle,
|
||||
dpusm_disk_flush_completion_t flush_completion, void *fc_args);
|
||||
void kernel_offloader_disk_close(void *disk_handle);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,453 @@
|
|||
/*
|
||||
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||
*
|
||||
* This program was produced under U.S. Government contract
|
||||
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||
* is operated by Triad National Security, LLC for the U.S.
|
||||
* Department of Energy/National Nuclear Security Administration. All
|
||||
* rights in the program are reserved by Triad National Security, LLC,
|
||||
* and the U.S. Department of Energy/National Nuclear Security
|
||||
* Administration. The Government is granted for itself and others
|
||||
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||
* license in this material to reproduce, prepare derivative works,
|
||||
* distribute copies to the public, perform publicly and display
|
||||
* publicly, and to permit others to do so.
|
||||
*
|
||||
* ----
|
||||
*
|
||||
* This program is open source under the BSD-3 License.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This provider communicates with the "kernel offloader", which is
|
||||
* actually just software running on the local kernel.
|
||||
*
|
||||
* Providers and offloaders are usually separate entities. However, to
|
||||
* keep things simple, the kernel offloader is compiled into this
|
||||
* provider.
|
||||
*
|
||||
* Providers run at the same location as ZFS. They are intended to be
|
||||
* small shims that translate between the DPUSM provider API and an
|
||||
* offloader's API (probably a header file analogous to
|
||||
* kernel_offloader.h).
|
||||
*
|
||||
* The method used to communicate between the provider and offloader
|
||||
* is not prescribed by the DPUSM. This allows for vendors to place
|
||||
* their offloaders locally or remotely, and use whatever method they
|
||||
* wish to use to communicate with their offloaders e.g. NVMeOF. The
|
||||
* kernel offloader is local and the communication method to access
|
||||
* the kernel offloader is calling local functions.
|
||||
*
|
||||
* Offloaders are normally expected to be hardware with its own memory
|
||||
* space. In order to simulate copying data to an offloader's memory
|
||||
* space, the kernel offloader allocates new buffers and copies ZFS
|
||||
* data into them, rather than using ZFS data directly. In order to
|
||||
* simulate handles that the provider does not know how to manipulate
|
||||
* or have access to, pointers returned from the kernel offloader are
|
||||
* masked with a random value.
|
||||
*
|
||||
* Note that this provider has to be loaded after ZFS because it
|
||||
* depends on ZFS for its "offload" functionality.
|
||||
*
|
||||
* Usage:
|
||||
* 1. Reconfigure ZFS with --with-zia=<DPUSM root>
|
||||
*
|
||||
* 2. Create a zpool
|
||||
*
|
||||
* 3. Select this provider with
|
||||
* zpool set zia_provider=zia-software-provider <zpool>
|
||||
*
|
||||
* 4. Enable "offloading" of operations with
|
||||
* zpool set zia_compress=on <zpool>
|
||||
* zpool set zia_decompress=on <zpool>
|
||||
* zpool set zia_checksum=on <zpool>
|
||||
* zpool set zia_raidz1_gen=on <zpool>
|
||||
* zpool set zia_raidz2_gen=on <zpool>
|
||||
* zpool set zia_raidz3_gen=on <zpool>
|
||||
* zpool set zia_raidz1_rec=on <zpool>
|
||||
* zpool set zia_raidz2_rec=on <zpool>
|
||||
* zpool set zia_raidz3_rec=on <zpool>
|
||||
* zpool set zia_file_write=on <zpool>
|
||||
* zpool set zia_disk_write=on <zpool>
|
||||
*
|
||||
* 5. Use the zpool as you would normally
|
||||
*
|
||||
* Notes:
|
||||
* If a ZFS IO stage is not run, enabling a Z.I.A. offload
|
||||
* will have no effect.
|
||||
*
|
||||
* Resilvering requires both zia_checksum and zia_raidz*_rec
|
||||
* to be enabled. Not enabling checksums would cause offloaded
|
||||
* resilvering to fail, and perform the remaining operations
|
||||
* in memory. To avoid the cost of offloading data only to
|
||||
* fail, a check has been inserted to prevent offloading
|
||||
* altogether if zia_checksum is not enabled.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include <dpusm/provider_api.h> /* the DPUSM provider API */
|
||||
#include <kernel_offloader.h> /* provides access to the offloader */
|
||||
|
||||
/* translate from offloader values to DPUSM values */
|
||||
static int
|
||||
translate_rc(const int offloader_rc)
|
||||
{
|
||||
int dpusm_rc = DPUSM_NOT_IMPLEMENTED;
|
||||
switch (offloader_rc) {
|
||||
case KERNEL_OFFLOADER_OK:
|
||||
dpusm_rc = DPUSM_OK;
|
||||
break;
|
||||
case KERNEL_OFFLOADER_ERROR:
|
||||
dpusm_rc = DPUSM_ERROR;
|
||||
break;
|
||||
case KERNEL_OFFLOADER_UNAVAILABLE:
|
||||
dpusm_rc = DPUSM_NOT_IMPLEMENTED;
|
||||
break;
|
||||
case KERNEL_OFFLOADER_BAD_RESULT:
|
||||
dpusm_rc = DPUSM_BAD_RESULT;
|
||||
break;
|
||||
case KERNEL_OFFLOADER_DOWN:
|
||||
dpusm_rc = DPUSM_PROVIDER_INVALIDATED;
|
||||
break;
|
||||
default:
|
||||
/* only translate recognized values */
|
||||
dpusm_rc = offloader_rc;
|
||||
break;
|
||||
}
|
||||
return (dpusm_rc);
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_algorithms(int *compress, int *decompress,
|
||||
int *checksum, int *checksum_byteorder, int *raid)
|
||||
{
|
||||
*compress =
|
||||
DPUSM_COMPRESS_GZIP_1 |
|
||||
DPUSM_COMPRESS_GZIP_2 |
|
||||
DPUSM_COMPRESS_GZIP_3 |
|
||||
DPUSM_COMPRESS_GZIP_4 |
|
||||
DPUSM_COMPRESS_GZIP_5 |
|
||||
DPUSM_COMPRESS_GZIP_6 |
|
||||
DPUSM_COMPRESS_GZIP_7 |
|
||||
DPUSM_COMPRESS_GZIP_8 |
|
||||
DPUSM_COMPRESS_GZIP_9 |
|
||||
DPUSM_COMPRESS_LZ4;
|
||||
|
||||
*decompress = *compress;
|
||||
|
||||
*checksum = DPUSM_CHECKSUM_FLETCHER_2 | DPUSM_CHECKSUM_FLETCHER_4;
|
||||
|
||||
*checksum_byteorder = DPUSM_BYTEORDER_NATIVE | DPUSM_BYTEORDER_BYTESWAP;
|
||||
|
||||
*raid =
|
||||
DPUSM_RAID_1_GEN |
|
||||
DPUSM_RAID_2_GEN |
|
||||
DPUSM_RAID_3_GEN |
|
||||
DPUSM_RAID_1_REC |
|
||||
DPUSM_RAID_2_REC |
|
||||
DPUSM_RAID_3_REC;
|
||||
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_get_size(void *handle, size_t *size, size_t *actual)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_get_size(handle,
|
||||
size, actual)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_copy_from_generic(dpusm_mv_t *mv, const void *buf, size_t size)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_copy_from_generic(mv->handle,
|
||||
mv->offset, buf, size)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_copy_to_generic(dpusm_mv_t *mv, void *buf, size_t size)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_copy_to_generic(mv->handle,
|
||||
mv->offset, buf, size)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_mem_stats(size_t *t_count, size_t *t_size, size_t *t_actual,
|
||||
size_t *a_count, size_t *a_size, size_t *a_actual)
|
||||
{
|
||||
void *t_count_handle = NULL;
|
||||
void *t_size_handle = NULL;
|
||||
void *t_actual_handle = NULL;
|
||||
void *a_size_handle = NULL;
|
||||
void *a_count_handle = NULL;
|
||||
void *a_actual_handle = NULL;
|
||||
|
||||
if (t_count) {
|
||||
t_count_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
if (t_size) {
|
||||
t_size_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
if (t_actual) {
|
||||
t_actual_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
if (a_count) {
|
||||
a_count_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
if (a_size) {
|
||||
a_size_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
if (a_actual) {
|
||||
a_actual_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
}
|
||||
|
||||
const int rc = kernel_offloader_mem_stats(t_count, t_size, t_actual,
|
||||
a_count, a_size, a_actual);
|
||||
if (rc == KERNEL_OFFLOADER_OK) {
|
||||
/* should probably check for errors */
|
||||
kernel_offloader_copy_to_generic(t_count_handle, 0,
|
||||
t_count, sizeof (*t_count));
|
||||
kernel_offloader_copy_to_generic(t_size_handle, 0,
|
||||
t_size, sizeof (*t_size));
|
||||
kernel_offloader_copy_to_generic(t_actual_handle, 0,
|
||||
t_actual, sizeof (*t_actual));
|
||||
kernel_offloader_copy_to_generic(a_count_handle, 0,
|
||||
a_count, sizeof (*a_count));
|
||||
kernel_offloader_copy_to_generic(a_size_handle, 0,
|
||||
a_size, sizeof (*a_size));
|
||||
kernel_offloader_copy_to_generic(a_actual_handle, 0,
|
||||
a_actual, sizeof (*a_actual));
|
||||
}
|
||||
|
||||
kernel_offloader_free(t_size_handle);
|
||||
kernel_offloader_free(t_count_handle);
|
||||
kernel_offloader_free(t_actual_handle);
|
||||
kernel_offloader_free(a_size_handle);
|
||||
kernel_offloader_free(a_count_handle);
|
||||
kernel_offloader_free(a_actual_handle);
|
||||
|
||||
return (translate_rc(rc));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_zero_fill(void *handle, size_t offset, size_t size)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_zero_fill(handle, offset, size)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_all_zeros(void *handle, size_t offset, size_t size)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_all_zeros(handle, offset, size)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_compress(dpusm_compress_t alg, int level,
|
||||
void *src, size_t s_len, void *dst, size_t *d_len)
|
||||
{
|
||||
/* buffer that offloader fills out */
|
||||
void *d_len_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||
|
||||
/* send original d_len to offloader */
|
||||
kernel_offloader_copy_from_generic(d_len_handle, 0,
|
||||
d_len, sizeof (*d_len));
|
||||
|
||||
const int kz_rc = kernel_offloader_compress(alg, level,
|
||||
src, s_len, dst, d_len_handle);
|
||||
if (kz_rc == KERNEL_OFFLOADER_OK) {
|
||||
/* get updated d_len back from offloader */
|
||||
kernel_offloader_copy_to_generic(d_len_handle, 0,
|
||||
d_len, sizeof (*d_len));
|
||||
}
|
||||
|
||||
kernel_offloader_free(d_len_handle);
|
||||
|
||||
return (translate_rc(kz_rc));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_decompress(dpusm_compress_t alg, int *level,
|
||||
void *src, size_t s_len, void *dst, size_t *d_len)
|
||||
{
|
||||
/* buffers that offloader fills out */
|
||||
void *level_handle = kernel_offloader_alloc(sizeof (*level));
|
||||
void *d_len_handle = kernel_offloader_alloc(sizeof (*d_len));
|
||||
|
||||
/* send original d_len to offloader */
|
||||
kernel_offloader_copy_from_generic(d_len_handle, 0,
|
||||
d_len, sizeof (*d_len));
|
||||
|
||||
const int kz_rc = kernel_offloader_decompress(alg, level_handle,
|
||||
src, s_len, dst, d_len_handle);
|
||||
if (kz_rc == KERNEL_OFFLOADER_OK) {
|
||||
/* get updated d_len back from offloader */
|
||||
kernel_offloader_copy_to_generic(d_len_handle, 0,
|
||||
d_len, sizeof (*d_len));
|
||||
kernel_offloader_copy_to_generic(level_handle, 0,
|
||||
level, sizeof (*level));
|
||||
}
|
||||
|
||||
kernel_offloader_free(d_len_handle);
|
||||
kernel_offloader_free(level_handle);
|
||||
|
||||
return (translate_rc(kz_rc));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_checksum(dpusm_checksum_t alg,
|
||||
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||
void *cksum, size_t cksum_size)
|
||||
{
|
||||
/* maybe translate alg and order */
|
||||
|
||||
/* trigger offloader to do actual calculation */
|
||||
return (translate_rc(kernel_offloader_checksum(alg,
|
||||
order, data, size, cksum, cksum_size)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_raid_can_compute(size_t nparity, size_t ndata,
|
||||
size_t *col_sizes, int rec)
|
||||
{
|
||||
if ((nparity < 1) || (nparity > 3)) {
|
||||
return (DPUSM_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
return (DPUSM_OK);
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_raid_gen(void *raid)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_raidz_gen(raid)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_raid_cmp(void *lhs_handle, void *rhs_handle, int *diff)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_cmp(lhs_handle,
|
||||
rhs_handle, diff)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_raid_rec(void *raid, int *tgts, int ntgts)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_raidz_rec(raid,
|
||||
tgts, ntgts)));
|
||||
}
|
||||
|
||||
static int
|
||||
sw_provider_file_write(void *fp_handle, void *handle, size_t count,
|
||||
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err)
|
||||
{
|
||||
return (translate_rc(kernel_offloader_file_write(fp_handle,
|
||||
handle, count, trailing_zeros, offset, resid, err)));
|
||||
}
|
||||
|
||||
/* BEGIN CSTYLED */
|
||||
static const dpusm_pf_t sw_provider_functions = {
|
||||
.algorithms = sw_provider_algorithms,
|
||||
.alloc = kernel_offloader_alloc,
|
||||
.alloc_ref = kernel_offloader_alloc_ref,
|
||||
.get_size = sw_provider_get_size,
|
||||
.free = kernel_offloader_free,
|
||||
.copy = {
|
||||
.from = {
|
||||
.generic = sw_provider_copy_from_generic,
|
||||
.ptr = NULL,
|
||||
.scatterlist = NULL,
|
||||
},
|
||||
.to = {
|
||||
.generic = sw_provider_copy_to_generic,
|
||||
.ptr = NULL,
|
||||
.scatterlist = NULL,
|
||||
},
|
||||
},
|
||||
.mem_stats = sw_provider_mem_stats,
|
||||
.zero_fill = sw_provider_zero_fill,
|
||||
.all_zeros = sw_provider_all_zeros,
|
||||
.compress = sw_provider_compress,
|
||||
.decompress = sw_provider_decompress,
|
||||
.checksum = sw_provider_checksum,
|
||||
.raid = {
|
||||
.can_compute = sw_provider_raid_can_compute,
|
||||
.alloc = kernel_offloader_raidz_alloc,
|
||||
.set_column = kernel_offloader_raidz_set_column,
|
||||
.free = kernel_offloader_raidz_free,
|
||||
.gen = sw_provider_raid_gen,
|
||||
.cmp = sw_provider_raid_cmp,
|
||||
.rec = sw_provider_raid_rec,
|
||||
},
|
||||
.file = {
|
||||
.open = kernel_offloader_file_open,
|
||||
.write = sw_provider_file_write,
|
||||
.close = kernel_offloader_file_close,
|
||||
},
|
||||
.disk = {
|
||||
.open = kernel_offloader_disk_open,
|
||||
.invalidate = kernel_offloader_disk_invalidate,
|
||||
.write = kernel_offloader_disk_write,
|
||||
.flush = kernel_offloader_disk_flush,
|
||||
.close = kernel_offloader_disk_close,
|
||||
},
|
||||
};
|
||||
/* END CSTYLED */
|
||||
|
||||
static int __init
|
||||
sw_provider_init(void)
|
||||
{
|
||||
/*
|
||||
* this should be a separate kernel module,
|
||||
* but is here for simplicity
|
||||
*/
|
||||
kernel_offloader_init();
|
||||
|
||||
return (dpusm_register_bsd(THIS_MODULE, &sw_provider_functions));
|
||||
}
|
||||
|
||||
static void __exit
|
||||
sw_provider_exit(void)
|
||||
{
|
||||
dpusm_unregister_bsd(THIS_MODULE);
|
||||
|
||||
kernel_offloader_fini();
|
||||
}
|
||||
|
||||
module_init(sw_provider_init);
|
||||
module_exit(sw_provider_exit);
|
||||
|
||||
MODULE_LICENSE("CDDL");
|
|
@ -38,6 +38,7 @@
|
|||
|
||||
%bcond_with debug
|
||||
%bcond_with debuginfo
|
||||
%bcond_with zia
|
||||
|
||||
|
||||
Name: %{module}-kmod
|
||||
|
@ -124,6 +125,12 @@ bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{
|
|||
%define debuginfo --disable-debuginfo
|
||||
%endif
|
||||
|
||||
%if %{with zia}
|
||||
%define zia --with-zia="%{?DPUSM_ROOT}"
|
||||
%else
|
||||
%define zia --without-zia
|
||||
%endif
|
||||
|
||||
# Leverage VPATH from configure to avoid making multiple copies.
|
||||
%define _configure ../%{module}-%{version}/configure
|
||||
|
||||
|
@ -144,7 +151,9 @@ for kernel_version in %{?kernel_versions}; do
|
|||
%{debuginfo} \
|
||||
%{?kernel_cc} \
|
||||
%{?kernel_ld} \
|
||||
%{?kernel_llvm}
|
||||
%{?kernel_llvm} \
|
||||
%{zia}
|
||||
|
||||
|
||||
# Pre-6.10 kernel builds didn't need to copy over the source files to the
|
||||
# build directory. However we do need to do it though post-6.10 due to
|
||||
|
|
|
@ -68,6 +68,7 @@
|
|||
%bcond_with systemd
|
||||
%bcond_with pam
|
||||
%bcond_without pyzfs
|
||||
%bcond_with zia
|
||||
|
||||
# Generic enable switch for systemd
|
||||
%if %{with systemd}
|
||||
|
@ -390,6 +391,12 @@ support for unlocking datasets on user login.
|
|||
%define pam --disable-pam
|
||||
%endif
|
||||
|
||||
%if %{with zia}
|
||||
%define zia --with-zia="%{DPUSM_ROOT}"
|
||||
%else
|
||||
%define zia --without-zia
|
||||
%endif
|
||||
|
||||
%setup -q
|
||||
|
||||
%build
|
||||
|
@ -409,7 +416,8 @@ support for unlocking datasets on user login.
|
|||
%{ubsan} \
|
||||
%{systemd} \
|
||||
%{pam} \
|
||||
%{pyzfs}
|
||||
%{pyzfs} \
|
||||
%{zia}
|
||||
make %{?_smp_mflags}
|
||||
|
||||
%install
|
||||
|
|
|
@ -59,6 +59,12 @@ fi
|
|||
%define debuginfo --disable-debuginfo
|
||||
%endif
|
||||
|
||||
%if %{with zia}
|
||||
%define zia --with-zia="%{?DPUSM_ROOT}"
|
||||
%else
|
||||
%define zia --without-zia
|
||||
%endif
|
||||
|
||||
%setup -n %{kmod_name}-%{version}
|
||||
%build
|
||||
%configure \
|
||||
|
@ -69,7 +75,8 @@ fi
|
|||
%{debuginfo} \
|
||||
%{?kernel_cc} \
|
||||
%{?kernel_ld} \
|
||||
%{?kernel_llvm}
|
||||
%{?kernel_llvm} \
|
||||
%{zia}
|
||||
make %{?_smp_mflags}
|
||||
|
||||
# Module signing (modsign)
|
||||
|
|
|
@ -222,3 +222,7 @@ tags = ['functional', 'zvol', 'zvol_misc']
|
|||
tests = ['idmap_mount_001', 'idmap_mount_002', 'idmap_mount_003',
|
||||
'idmap_mount_004', 'idmap_mount_005']
|
||||
tags = ['functional', 'idmap_mount']
|
||||
|
||||
[tests/functional/zia:Linux]
|
||||
tests = ['zia_props', 'zia_write_pipeline', 'zia_raidz_resilver']
|
||||
tags = ['functional', 'zia']
|
||||
|
|
|
@ -139,6 +139,7 @@ export SYSTEM_FILES_LINUX='attr
|
|||
groupdel
|
||||
groupmod
|
||||
hostid
|
||||
insmod
|
||||
logger
|
||||
losetup
|
||||
lsattr
|
||||
|
@ -154,6 +155,7 @@ export SYSTEM_FILES_LINUX='attr
|
|||
nsenter
|
||||
parted
|
||||
perf
|
||||
rmmod
|
||||
setfattr
|
||||
setpriv
|
||||
sha256sum
|
||||
|
|
|
@ -387,7 +387,9 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
|
|||
functional/zvol/zvol_misc/zvol_misc_common.kshlib \
|
||||
functional/zvol/zvol_swap/zvol_swap.cfg \
|
||||
functional/idmap_mount/idmap_mount.cfg \
|
||||
functional/idmap_mount/idmap_mount_common.kshlib
|
||||
functional/idmap_mount/idmap_mount_common.kshlib \
|
||||
functional/zia/zia.cfg \
|
||||
functional/zia/zia.kshlib
|
||||
|
||||
nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||
functional/acl/off/cleanup.ksh \
|
||||
|
@ -2141,4 +2143,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||
functional/idmap_mount/idmap_mount_002.ksh \
|
||||
functional/idmap_mount/idmap_mount_003.ksh \
|
||||
functional/idmap_mount/idmap_mount_004.ksh \
|
||||
functional/idmap_mount/idmap_mount_005.ksh
|
||||
functional/idmap_mount/idmap_mount_005.ksh \
|
||||
functional/zia/cleanup.ksh \
|
||||
functional/zia/setup.ksh \
|
||||
functional/zia/zia_props.ksh \
|
||||
functional/zia/zia_raidz_resilver.ksh \
|
||||
functional/zia/zia_write_pipeline.ksh
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||
|
||||
verify_runnable "global"
|
||||
|
||||
log_must dpusm_loaded
|
||||
log_must unload_provider
|
||||
default_cleanup
|
|
@ -0,0 +1,40 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||
verify_runnable "global"
|
||||
|
||||
log_must default_zpool
|
||||
|
||||
# dpusm must be loaded before ZFS, but have to check
|
||||
# after creating the zpool because this function uses
|
||||
# the result of 'zpool get zia_available'
|
||||
log_must dpusm_loaded
|
||||
|
||||
log_must load_provider
|
||||
|
||||
log_pass
|
|
@ -0,0 +1,37 @@
|
|||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Use is subject to license terms.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
export PROVIDER_MODULE="zia_software_provider"
|
||||
export PROVIDER="zia-software-provider"
|
||||
export BLOCKSZ=8192
|
||||
export NUM_WRITES=65536
|
||||
export DATA="R"
|
||||
export FILENAME="${TESTDIR}/file"
|
||||
export RESILVER_REPLACEMENT="${TEST_BASE_DIR}/replacement"
|
|
@ -0,0 +1,136 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.cfg
|
||||
|
||||
function default_zpool
|
||||
{
|
||||
default_raidz_setup_noexit "${DISKS}"
|
||||
log_must zfs set compression=on "${TESTPOOL}"
|
||||
log_must zfs set checksum=on "${TESTPOOL}"
|
||||
}
|
||||
|
||||
function zia_available
|
||||
{
|
||||
zpool get -H -o value zia_available "${TESTPOOL}"
|
||||
}
|
||||
|
||||
function dpusm_loaded
|
||||
{
|
||||
if [[ "$(zia_available)" == "yes" ]]
|
||||
then
|
||||
lsmod | grep dpusm > /dev/null
|
||||
ret="$?"
|
||||
(( "${ret}" != "0" )) && log_unsupported "dpusm not loaded"
|
||||
fi
|
||||
}
|
||||
|
||||
# Providers can be loaded at any time after the dpusm
|
||||
#
|
||||
# The software provider must be loaded after ZFS since
|
||||
# it uses ZFS symbols.
|
||||
#
|
||||
# If Z.I.A. is not available, the tests should still pass
|
||||
#
|
||||
function load_provider
|
||||
{
|
||||
if [[ "$(zia_available)" == "yes" ]]
|
||||
then
|
||||
log_must insmod "${SBIN_DIR}/module/${PROVIDER}.ko"
|
||||
log_must zpool set zia_provider="${PROVIDER}" "${TESTPOOL}"
|
||||
fi
|
||||
}
|
||||
|
||||
function unload_provider
|
||||
{
|
||||
if [[ "$(zia_available)" == "yes" ]]
|
||||
then
|
||||
log_must zpool set zia_provider="" "${TESTPOOL}"
|
||||
log_must rmmod "${PROVIDER_MODULE}"
|
||||
fi
|
||||
}
|
||||
|
||||
function offload_all
|
||||
{
|
||||
log_must zpool set zia_compress="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_checksum="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz1_gen="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz2_gen="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz3_gen="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz1_rec="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz2_rec="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz3_rec="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_disk_write="on" "${TESTPOOL}"
|
||||
log_must zpool set zia_file_write="on" "${TESTPOOL}"
|
||||
}
|
||||
|
||||
#
|
||||
# loop through each combination of Z.I.A. offloads
|
||||
# and make sure writing works
|
||||
#
|
||||
function loop_offloads_and_write
|
||||
{
|
||||
for comp in on off
|
||||
do
|
||||
log_must zpool set zia_compress="${comp}" "${TESTPOOL}"
|
||||
|
||||
for cksum in on off
|
||||
do
|
||||
log_must zpool set zia_checksum="${cksum}" "${TESTPOOL}"
|
||||
|
||||
for raidz in on off
|
||||
do
|
||||
log_must zpool set zia_raidz1_gen="${raidz}" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz2_gen="${raidz}" "${TESTPOOL}"
|
||||
log_must zpool set zia_raidz3_gen="${raidz}" "${TESTPOOL}"
|
||||
|
||||
for diskfile in on off
|
||||
do
|
||||
log_must zpool set zia_disk_write="${diskfile}" "${TESTPOOL}"
|
||||
log_must zpool set zia_file_write="${diskfile}" "${TESTPOOL}"
|
||||
|
||||
log_must file_write -o create -f "${FILENAME}" -b "${BLOCKSZ}" -c "${NUM_WRITES}" -d "${DATA}"
|
||||
log_must ls -l "${FILENAME}"
|
||||
log_must verify_pool "${TESTPOOL}"
|
||||
log_must check_pool_status "${TESTPOOL}" "errors" "No known data errors"
|
||||
log_must rm "${FILENAME}"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# copied from default_raidz_setup_noexit
|
||||
function random_disk
|
||||
{
|
||||
typeset disklist="$*"
|
||||
disks=(${disklist[*]})
|
||||
count="${#disks[*]}"
|
||||
idx="$(($(random 1 ${count}) - 1))"
|
||||
echo "${disks[${idx}]}"
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Z.I.A. zpool settings work
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Turn on all offloads
|
||||
# 2. Run zpool get on each property
|
||||
#
|
||||
|
||||
log_must offload_all
|
||||
|
||||
log_must zpool get zia_available "${TESTPOOL}"
|
||||
log_must zpool get zia_provider "${TESTPOOL}"
|
||||
log_must zpool get zia_compress "${TESTPOOL}"
|
||||
log_must zpool get zia_checksum "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz1_gen "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz2_gen "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz3_gen "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz1_rec "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz2_rec "${TESTPOOL}"
|
||||
log_must zpool get zia_raidz3_rec "${TESTPOOL}"
|
||||
log_must zpool get zia_disk_write "${TESTPOOL}"
|
||||
log_must zpool get zia_file_write "${TESTPOOL}"
|
||||
|
||||
log_pass
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Z.I.A. RAIDZ Resilver works
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Turn on all offloads
|
||||
# 2. Write data to the zpool
|
||||
# 3. Replace a drive
|
||||
# 4. Resilver the zpool with Z.I.A.
|
||||
# 5. Check for errors
|
||||
#
|
||||
|
||||
log_must truncate -s 4G "${RESILVER_REPLACEMENT}"
|
||||
|
||||
function cleanup
|
||||
{
|
||||
log_must rm "${RESILVER_REPLACEMENT}"
|
||||
}
|
||||
log_onexit cleanup
|
||||
|
||||
log_must offload_all
|
||||
|
||||
# write a file
|
||||
log_must file_write -o create -f "${FILENAME}" -b "${BLOCKSZ}" -c "${NUM_WRITES}" -d "${DATA}"
|
||||
log_must ls -l "${FILENAME}"
|
||||
|
||||
# pick a random backing device to offline and replace it
|
||||
bad="$(random_disk ${DISKS})"
|
||||
log_must zpool offline "${TESTPOOL}" "${bad}"
|
||||
log_must zpool replace "${TESTPOOL}" "${bad}" "${RESILVER_REPLACEMENT}"
|
||||
log_must wait_replacing "${TESTPOOL}"
|
||||
|
||||
log_must verify_pool "${TESTPOOL}"
|
||||
log_must check_pool_status "${TESTPOOL}" "errors" "No known data errors"
|
||||
|
||||
log_pass
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/ksh -p
|
||||
#
|
||||
# CDDL HEADER START
|
||||
#
|
||||
# The contents of this file are subject to the terms of the
|
||||
# Common Development and Distribution License (the "License").
|
||||
# You may not use this file except in compliance with the License.
|
||||
#
|
||||
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
# or https://opensource.org/licenses/CDDL-1.0.
|
||||
# See the License for the specific language governing permissions
|
||||
# and limitations under the License.
|
||||
#
|
||||
# When distributing Covered Code, include this CDDL HEADER in each
|
||||
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
# If applicable, add the following below this CDDL HEADER, with the
|
||||
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
#
|
||||
# CDDL HEADER END
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# Z.I.A. Write Pipeline works
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Turn each of the offloaded stages on and off
|
||||
# 1.1. Write data to the zpool
|
||||
# 1.2. Delete the file
|
||||
# 2. Disable the provider for the pool and unload the provider
|
||||
# 3. Do 1. again, but without a provider to make sure Z.I.A. falls back to ZFS properly
|
||||
#
|
||||
|
||||
log_must loop_offloads_and_write
|
||||
log_must unload_provider
|
||||
log_must loop_offloads_and_write
|
||||
log_must load_provider
|
||||
|
||||
log_pass
|
Loading…
Reference in New Issue