Merge 3070faa798
into 1713aa7b4d
This commit is contained in:
commit
39a4002f3d
|
@ -57,6 +57,8 @@ dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2
|
||||||
dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
|
dist_noinst_DATA += module/os/linux/spl/THIRDPARTYLICENSE.gplv2.descrip
|
||||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash
|
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash
|
||||||
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash.descrip
|
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.cityhash.descrip
|
||||||
|
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.zia
|
||||||
|
dist_noinst_DATA += module/zfs/THIRDPARTYLICENSE.zia.descrip
|
||||||
|
|
||||||
@CODE_COVERAGE_RULES@
|
@CODE_COVERAGE_RULES@
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\"
|
||||||
AM_CPPFLAGS += $(DEBUG_CPPFLAGS)
|
AM_CPPFLAGS += $(DEBUG_CPPFLAGS)
|
||||||
AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS)
|
AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS)
|
||||||
AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\"
|
AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\"
|
||||||
|
AM_CPPFLAGS += $(ZIA_CPPFLAGS)
|
||||||
|
|
||||||
if ASAN_ENABLED
|
if ASAN_ENABLED
|
||||||
AM_CPPFLAGS += -DZFS_ASAN_ENABLED
|
AM_CPPFLAGS += -DZFS_ASAN_ENABLED
|
||||||
|
|
|
@ -263,6 +263,8 @@ AC_DEFUN([ZFS_AC_CONFIG], [
|
||||||
AC_SUBST(TEST_JOBS)
|
AC_SUBST(TEST_JOBS)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
ZFS_AC_ZIA
|
||||||
|
|
||||||
ZFS_INIT_SYSV=
|
ZFS_INIT_SYSV=
|
||||||
ZFS_INIT_SYSTEMD=
|
ZFS_INIT_SYSTEMD=
|
||||||
ZFS_WANT_MODULES_LOAD_D=
|
ZFS_WANT_MODULES_LOAD_D=
|
||||||
|
@ -294,7 +296,8 @@ AC_DEFUN([ZFS_AC_CONFIG], [
|
||||||
[test "x$qatsrc" != x ])
|
[test "x$qatsrc" != x ])
|
||||||
AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
|
AM_CONDITIONAL([WANT_DEVNAME2DEVID], [test "x$user_libudev" = xyes ])
|
||||||
AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
|
AM_CONDITIONAL([WANT_MMAP_LIBAIO], [test "x$user_libaio" = xyes ])
|
||||||
AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes])
|
AM_CONDITIONAL([PAM_ZFS_ENABLED], [test "x$enable_pam" = xyes ])
|
||||||
|
AM_CONDITIONAL([ZIA_ENABLED], [test "x$enable_zia" = xyes ])
|
||||||
])
|
])
|
||||||
|
|
||||||
dnl #
|
dnl #
|
||||||
|
@ -342,6 +345,10 @@ AC_DEFUN([ZFS_AC_RPM], [
|
||||||
RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
|
RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "__strip /bin/true"'
|
||||||
])
|
])
|
||||||
|
|
||||||
|
AS_IF([test "x$enable_zia" = xyes], [
|
||||||
|
RPM_DEFINE_COMMON=${RPM_DEFINE_COMMON}' --define "$(WITH_ZIA) 1" --define "DPUSM_ROOT $(DPUSM_ROOT)"'
|
||||||
|
])
|
||||||
|
|
||||||
RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'
|
RPM_DEFINE_UTIL=' --define "_initconfdir $(initconfdir)"'
|
||||||
|
|
||||||
dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
|
dnl # Make the next three RPM_DEFINE_UTIL additions conditional, since
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
dnl # Adds --with-zia=PATH to configuration options
|
||||||
|
dnl # The path provided should point to the DPUSM
|
||||||
|
dnl # root and contain Module.symvers.
|
||||||
|
AC_DEFUN([ZFS_AC_ZIA], [
|
||||||
|
AC_ARG_WITH([zia],
|
||||||
|
AS_HELP_STRING([--with-zia=PATH],
|
||||||
|
[Path to Data Processing Services Module]),
|
||||||
|
[
|
||||||
|
DPUSM_ROOT="$withval"
|
||||||
|
AS_IF([test "x$DPUSM_ROOT" != "xno"],
|
||||||
|
[enable_zia=yes],
|
||||||
|
[enable_zia=no])
|
||||||
|
],
|
||||||
|
[enable_zia=no]
|
||||||
|
)
|
||||||
|
|
||||||
|
AS_IF([test "x$enable_zia" == "xyes"],
|
||||||
|
AS_IF([! test -d "$DPUSM_ROOT"],
|
||||||
|
[AC_MSG_ERROR([--with-zia=PATH requires the DPUSM root directory])]
|
||||||
|
)
|
||||||
|
|
||||||
|
DPUSM_SYMBOLS="$DPUSM_ROOT/Module.symvers"
|
||||||
|
|
||||||
|
AS_IF([test -r $DPUSM_SYMBOLS],
|
||||||
|
[
|
||||||
|
AC_MSG_RESULT([$DPUSM_SYMBOLS])
|
||||||
|
ZIA_CPPFLAGS="-DZIA=1 -I$DPUSM_ROOT/include"
|
||||||
|
KERNEL_ZIA_CPPFLAGS="-DZIA=1 -I$DPUSM_ROOT/include"
|
||||||
|
WITH_ZIA="_with_zia"
|
||||||
|
|
||||||
|
AC_SUBST(WITH_ZIA)
|
||||||
|
AC_SUBST(KERNEL_ZIA_CPPFLAGS)
|
||||||
|
AC_SUBST(ZIA_CPPFLAGS)
|
||||||
|
AC_SUBST(DPUSM_SYMBOLS)
|
||||||
|
AC_SUBST(DPUSM_ROOT)
|
||||||
|
],
|
||||||
|
[
|
||||||
|
AC_MSG_ERROR([
|
||||||
|
*** Failed to find Module.symvers in:
|
||||||
|
$DPUSM_SYMBOLS
|
||||||
|
])
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
])
|
|
@ -143,6 +143,9 @@ COMMON_H = \
|
||||||
sys/zfs_vfsops.h \
|
sys/zfs_vfsops.h \
|
||||||
sys/zfs_vnops.h \
|
sys/zfs_vnops.h \
|
||||||
sys/zfs_znode.h \
|
sys/zfs_znode.h \
|
||||||
|
sys/zia.h \
|
||||||
|
sys/zia_cddl.h \
|
||||||
|
sys/zia_private.h \
|
||||||
sys/zil.h \
|
sys/zil.h \
|
||||||
sys/zil_impl.h \
|
sys/zil_impl.h \
|
||||||
sys/zio.h \
|
sys/zio.h \
|
||||||
|
|
|
@ -64,6 +64,7 @@ typedef struct abd {
|
||||||
list_t abd_gang_chain;
|
list_t abd_gang_chain;
|
||||||
} abd_gang;
|
} abd_gang;
|
||||||
} abd_u;
|
} abd_u;
|
||||||
|
void *abd_zia_handle;
|
||||||
} abd_t;
|
} abd_t;
|
||||||
|
|
||||||
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
|
||||||
|
|
|
@ -261,6 +261,19 @@ typedef enum {
|
||||||
ZPOOL_PROP_DEDUP_TABLE_SIZE,
|
ZPOOL_PROP_DEDUP_TABLE_SIZE,
|
||||||
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
|
ZPOOL_PROP_DEDUP_TABLE_QUOTA,
|
||||||
ZPOOL_PROP_DEDUPCACHED,
|
ZPOOL_PROP_DEDUPCACHED,
|
||||||
|
ZPOOL_PROP_ZIA_AVAILABLE,
|
||||||
|
ZPOOL_PROP_ZIA_PROVIDER,
|
||||||
|
ZPOOL_PROP_ZIA_COMPRESS,
|
||||||
|
ZPOOL_PROP_ZIA_DECOMPRESS,
|
||||||
|
ZPOOL_PROP_ZIA_CHECKSUM,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ1_GEN,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ2_GEN,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ3_GEN,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ1_REC,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ2_REC,
|
||||||
|
ZPOOL_PROP_ZIA_RAIDZ3_REC,
|
||||||
|
ZPOOL_PROP_ZIA_FILE_WRITE,
|
||||||
|
ZPOOL_PROP_ZIA_DISK_WRITE,
|
||||||
ZPOOL_NUM_PROPS
|
ZPOOL_NUM_PROPS
|
||||||
} zpool_prop_t;
|
} zpool_prop_t;
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,7 @@
|
||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
#include <sys/zthr.h>
|
#include <sys/zthr.h>
|
||||||
#include <sys/dsl_deadlist.h>
|
#include <sys/dsl_deadlist.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#include <zfeature_common.h>
|
#include <zfeature_common.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -479,6 +480,8 @@ struct spa {
|
||||||
*/
|
*/
|
||||||
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
|
||||||
zfs_refcount_t spa_refcount; /* number of opens */
|
zfs_refcount_t spa_refcount; /* number of opens */
|
||||||
|
|
||||||
|
zia_props_t spa_zia_props;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern char *spa_config_path;
|
extern char *spa_config_path;
|
||||||
|
|
|
@ -42,5 +42,13 @@
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#include <sys/vdev.h>
|
#include <sys/vdev.h>
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
int __vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||||
|
size_t io_size, uint64_t io_offset, int rw, int flags);
|
||||||
|
int vdev_disk_io_flush(struct block_device *bdev, zio_t *zio);
|
||||||
|
void vdev_disk_error(zio_t *zio);
|
||||||
|
#endif /* __linux__ */
|
||||||
|
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
#endif /* _SYS_VDEV_DISK_H */
|
#endif /* _SYS_VDEV_DISK_H */
|
||||||
|
|
|
@ -40,6 +40,10 @@ typedef struct vdev_file {
|
||||||
extern void vdev_file_init(void);
|
extern void vdev_file_init(void);
|
||||||
extern void vdev_file_fini(void);
|
extern void vdev_file_fini(void);
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
extern mode_t vdev_file_open_mode(spa_mode_t spa_mode);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -462,6 +462,8 @@ struct vdev {
|
||||||
uint64_t vdev_io_t;
|
uint64_t vdev_io_t;
|
||||||
uint64_t vdev_slow_io_n;
|
uint64_t vdev_slow_io_n;
|
||||||
uint64_t vdev_slow_io_t;
|
uint64_t vdev_slow_io_t;
|
||||||
|
|
||||||
|
void *vdev_zia_handle;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define VDEV_PAD_SIZE (8 << 10)
|
#define VDEV_PAD_SIZE (8 << 10)
|
||||||
|
|
|
@ -169,6 +169,11 @@ extern int vdev_raidz_load(vdev_t *);
|
||||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
|
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
|
||||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
|
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
|
||||||
|
|
||||||
|
void vdev_raidz_generate_parity_p(struct raidz_row *);
|
||||||
|
void vdev_raidz_generate_parity_pq(struct raidz_row *);
|
||||||
|
void vdev_raidz_generate_parity_pqr(struct raidz_row *);
|
||||||
|
void vdev_raidz_reconstruct_general(struct raidz_row *, int *, int);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -136,6 +136,7 @@ typedef struct raidz_row {
|
||||||
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
||||||
uint64_t rr_size; /* Physical size for *_io_verify() */
|
uint64_t rr_size; /* Physical size for *_io_verify() */
|
||||||
#endif
|
#endif
|
||||||
|
void *rr_zia_handle;
|
||||||
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
||||||
} raidz_row_t;
|
} raidz_row_t;
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ typedef struct mzap_phys {
|
||||||
uint64_t mz_salt;
|
uint64_t mz_salt;
|
||||||
uint64_t mz_normflags;
|
uint64_t mz_normflags;
|
||||||
uint64_t mz_pad[5];
|
uint64_t mz_pad[5];
|
||||||
mzap_ent_phys_t mz_chunk[1];
|
mzap_ent_phys_t mz_chunk[];
|
||||||
/* actually variable size depending on block size */
|
/* actually variable size depending on block size */
|
||||||
} mzap_phys_t;
|
} mzap_phys_t;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,225 @@
|
||||||
|
/*
|
||||||
|
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program was produced under U.S. Government contract
|
||||||
|
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
* is operated by Triad National Security, LLC for the U.S.
|
||||||
|
* Department of Energy/National Nuclear Security Administration. All
|
||||||
|
* rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
* and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
* Administration. The Government is granted for itself and others
|
||||||
|
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
* license in this material to reproduce, prepare derivative works,
|
||||||
|
* distribute copies to the public, perform publicly and display
|
||||||
|
* publicly, and to permit others to do so.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
*
|
||||||
|
* This program is open source under the BSD-3 License.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ZIA_H
|
||||||
|
#define _ZIA_H
|
||||||
|
|
||||||
|
#include <sys/abd.h>
|
||||||
|
#include <sys/fs/zfs.h> /* VDEV_RAIDZ_MAXPARITY */
|
||||||
|
#include <sys/vdev.h>
|
||||||
|
#include <sys/vdev_raidz_impl.h>
|
||||||
|
#include <sys/zio.h>
|
||||||
|
#include <sys/zio_checksum.h>
|
||||||
|
#include <sys/zio_compress.h>
|
||||||
|
|
||||||
|
/* ******************************************************** */
|
||||||
|
/* return values */
|
||||||
|
#define ZIA_OK 1000
|
||||||
|
|
||||||
|
/* something bad happened not related to missing functionality */
|
||||||
|
#define ZIA_ERROR 1001
|
||||||
|
|
||||||
|
/* error, fallback to zfs implementation */
|
||||||
|
#define ZIA_FALLBACK 1002
|
||||||
|
|
||||||
|
/* ran, but result is bad */
|
||||||
|
#define ZIA_BAD_RESULT 1003
|
||||||
|
|
||||||
|
/* expected provider and actual provider do not match */
|
||||||
|
#define ZIA_PROVIDER_MISMATCH 1004
|
||||||
|
|
||||||
|
/*
|
||||||
|
* error, returned when the provider can no longer
|
||||||
|
* communicate with the accelerator (providers are
|
||||||
|
* software, and are not expected to randomly go
|
||||||
|
* down)
|
||||||
|
*/
|
||||||
|
#define ZIA_ACCELERATOR_DOWN 1005
|
||||||
|
/* ******************************************************** */
|
||||||
|
|
||||||
|
/* DPUSM was not found by configure */
|
||||||
|
#define ZIA_DISABLED 1006
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct is normally set with
|
||||||
|
* zpool set zia_<property>=on/off/<value>
|
||||||
|
* and passed around in spa_t.
|
||||||
|
*/
|
||||||
|
typedef struct zia_props {
|
||||||
|
/* global state */
|
||||||
|
boolean_t can_offload;
|
||||||
|
void *provider;
|
||||||
|
|
||||||
|
/* minimum size allowed to offload - set by ashift */
|
||||||
|
size_t min_offload_size;
|
||||||
|
|
||||||
|
int compress;
|
||||||
|
int decompress;
|
||||||
|
|
||||||
|
int checksum;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
int gen[VDEV_RAIDZ_MAXPARITY + 1];
|
||||||
|
int rec[VDEV_RAIDZ_MAXPARITY + 1];
|
||||||
|
} raidz;
|
||||||
|
|
||||||
|
int file_write;
|
||||||
|
int disk_write;
|
||||||
|
} zia_props_t;
|
||||||
|
|
||||||
|
zia_props_t *zia_get_props(spa_t *spa);
|
||||||
|
void zia_prop_warn(boolean_t val, const char *name);
|
||||||
|
|
||||||
|
int zia_init(void);
|
||||||
|
int zia_fini(void);
|
||||||
|
|
||||||
|
void *zia_get_provider(const char *name, vdev_t *vdev);
|
||||||
|
const char *zia_get_provider_name(void *provider);
|
||||||
|
int zia_put_provider(void **provider, vdev_t *vdev);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* turn off offloading for this zio as well as
|
||||||
|
* all new zios created with the same spa
|
||||||
|
*/
|
||||||
|
int zia_disable_offloading(zio_t *zio, boolean_t reexecute);
|
||||||
|
|
||||||
|
/* check if offloading can occur */
|
||||||
|
boolean_t zia_is_used(zio_t *zio);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check if a handle is associated with this pointer
|
||||||
|
*
|
||||||
|
* not exposing functions for different handles because
|
||||||
|
* only abd handles are checked outside of zia.c
|
||||||
|
*/
|
||||||
|
boolean_t zia_is_offloaded(abd_t *abd);
|
||||||
|
|
||||||
|
int zia_worst_error(const int lhs, const int rhs);
|
||||||
|
|
||||||
|
/* create a new offloader handle without copying data */
|
||||||
|
void *zia_alloc(void *provider, size_t size, size_t min_offload_size);
|
||||||
|
|
||||||
|
/* deallocate handle without onloading */
|
||||||
|
int zia_free(void **handle);
|
||||||
|
|
||||||
|
/* move linear data between from the offloader to memory */
|
||||||
|
int zia_onload(void **handle, void *buf, size_t size);
|
||||||
|
|
||||||
|
/* calls abd_iterate_func on the abd to copy abd data back and forth */
|
||||||
|
int zia_offload_abd(void *provider, abd_t *abd,
|
||||||
|
size_t size, size_t min_offload_size,
|
||||||
|
boolean_t *local_offload, boolean_t lock);
|
||||||
|
int zia_onload_abd(abd_t *abd, size_t size,
|
||||||
|
boolean_t keep_handle);
|
||||||
|
int zia_free_abd(abd_t *abd, boolean_t lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if offloaded locally, just free the handle
|
||||||
|
* if not, onload the data and free the handle
|
||||||
|
*/
|
||||||
|
int zia_cleanup_abd(abd_t *abd, size_t size,
|
||||||
|
boolean_t local_offload, boolean_t lock);
|
||||||
|
|
||||||
|
/* if the accelerator failed, restart the zio */
|
||||||
|
void zia_restart_before_vdev(zio_t *zio);
|
||||||
|
|
||||||
|
/* fill a buffer with zeros */
|
||||||
|
int zia_zero_fill(abd_t *abd, size_t offset, size_t size);
|
||||||
|
|
||||||
|
int
|
||||||
|
zia_compress(zia_props_t *props, enum zio_compress c,
|
||||||
|
abd_t *src, size_t s_len,
|
||||||
|
abd_t **dst, uint64_t *d_len,
|
||||||
|
uint8_t level, boolean_t *local_offload);
|
||||||
|
|
||||||
|
int
|
||||||
|
zia_decompress(zia_props_t *props, enum zio_compress c,
|
||||||
|
abd_t *src, size_t s_len, abd_t *dst, size_t d_len,
|
||||||
|
uint8_t *level);
|
||||||
|
|
||||||
|
int zia_checksum_compute(void *provider, zio_cksum_t *dst,
|
||||||
|
enum zio_checksum alg, zio_t *zio, uint64_t size,
|
||||||
|
boolean_t *local_offload);
|
||||||
|
int zia_checksum_error(enum zio_checksum alg, abd_t *abd,
|
||||||
|
uint64_t size, int byteswap, zio_cksum_t *actual_cksum);
|
||||||
|
|
||||||
|
/* raidz */
|
||||||
|
int zia_raidz_alloc(zio_t *zio, raidz_row_t *rr, boolean_t rec,
|
||||||
|
uint_t cksum, boolean_t *local_offload);
|
||||||
|
int zia_raidz_free(raidz_row_t *rr, boolean_t onload_parity);
|
||||||
|
int zia_raidz_gen(raidz_row_t *rr);
|
||||||
|
int zia_raidz_gen_cleanup(zio_t *zio, raidz_row_t *rr,
|
||||||
|
boolean_t local_offload);
|
||||||
|
int zia_raidz_new_parity(zio_t *zio, raidz_row_t *rr, uint64_t c);
|
||||||
|
/* compare the contents of offloaded abds (only used in resilver) */
|
||||||
|
int zia_raidz_cmp(abd_t *lhs, abd_t *rhs, int *diff);
|
||||||
|
int zia_raidz_rec(raidz_row_t *rr, int *t, int nt);
|
||||||
|
int zia_raidz_rec_cleanup(zio_t *zio, raidz_row_t *rr,
|
||||||
|
boolean_t local_offload, boolean_t onload_parity);
|
||||||
|
|
||||||
|
/* file I/O */
|
||||||
|
int zia_file_open(vdev_t *vdev, const char *path,
|
||||||
|
int flags, int mode);
|
||||||
|
int zia_file_write(vdev_t *vdev, abd_t *abd, ssize_t size,
|
||||||
|
loff_t offset, ssize_t *resid, int *err);
|
||||||
|
int zia_file_close(vdev_t *vdev);
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#ifdef _KERNEL
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
|
||||||
|
/* disk I/O */
|
||||||
|
int zia_disk_open(vdev_t *vdev, const char *path,
|
||||||
|
struct block_device *bdev);
|
||||||
|
int zia_disk_invalidate(vdev_t *vdev);
|
||||||
|
int zia_disk_write(vdev_t *vdev, zio_t *zio,
|
||||||
|
size_t io_size, uint64_t io_offset, int flags);
|
||||||
|
int zia_disk_flush(vdev_t *vdev, zio_t *zio);
|
||||||
|
int zia_disk_close(vdev_t *vdev);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ZIA_CDDL_H
|
||||||
|
#define _ZIA_CDDL_H
|
||||||
|
|
||||||
|
#include <sys/abd.h>
|
||||||
|
#include <sys/zio.h>
|
||||||
|
#include <sys/zio_compress.h>
|
||||||
|
|
||||||
|
#ifdef ZIA
|
||||||
|
#include <dpusm/user_api.h>
|
||||||
|
int
|
||||||
|
zia_compress_impl(const dpusm_uf_t *dpusm, zia_props_t *props,
|
||||||
|
enum zio_compress c, abd_t *src, size_t s_len,
|
||||||
|
void **cbuf_handle, uint64_t *c_len,
|
||||||
|
uint8_t level, boolean_t *local_offload);
|
||||||
|
|
||||||
|
int
|
||||||
|
zia_raidz_rec_impl(const dpusm_uf_t *dpusm,
|
||||||
|
raidz_row_t *rr, int *t, int nt);
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
void
|
||||||
|
zia_disk_write_completion(void *zio_ptr, int error);
|
||||||
|
|
||||||
|
void
|
||||||
|
zia_disk_flush_completion(void *zio_ptr, int error);
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
|
#endif /* ZIA */
|
||||||
|
|
||||||
|
#endif /* _ZIA_CDDL_H */
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program was produced under U.S. Government contract
|
||||||
|
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
* is operated by Triad National Security, LLC for the U.S.
|
||||||
|
* Department of Energy/National Nuclear Security Administration. All
|
||||||
|
* rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
* and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
* Administration. The Government is granted for itself and others
|
||||||
|
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
* license in this material to reproduce, prepare derivative works,
|
||||||
|
* distribute copies to the public, perform publicly and display
|
||||||
|
* publicly, and to permit others to do so.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
*
|
||||||
|
* This program is open source under the BSD-3 License.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ZIA_PRIVATE_H
|
||||||
|
#define _ZIA_PRIVATE_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* needed by both zia.h and zia_cddl.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/zio.h>
|
||||||
|
#include <sys/zio_compress.h>
|
||||||
|
#include <sys/zio_checksum.h>
|
||||||
|
|
||||||
|
#define ABD_HANDLE(abd) (abd)->abd_zia_handle
|
||||||
|
|
||||||
|
#define VDEV_HANDLE(vdev) (vdev)->vdev_zia_handle
|
||||||
|
|
||||||
|
int
|
||||||
|
dpusm_to_ret(const int dpusm_ret);
|
||||||
|
|
||||||
|
#ifdef ZIA
|
||||||
|
#include <dpusm/user_api.h>
|
||||||
|
|
||||||
|
dpusm_compress_t
|
||||||
|
compress_to_dpusm(enum zio_compress c);
|
||||||
|
|
||||||
|
int zia_get_capabilities(void *provider, dpusm_pc_t **caps);
|
||||||
|
|
||||||
|
#endif /* ZIA */
|
||||||
|
|
||||||
|
#endif /* _ZIA_PRIVATE_H */
|
|
@ -226,6 +226,8 @@ typedef uint64_t zio_flag_t;
|
||||||
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
#define ZIO_FLAG_REEXECUTED (1ULL << 29)
|
||||||
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
#define ZIO_FLAG_DELEGATED (1ULL << 30)
|
||||||
|
|
||||||
|
#define ZIO_FLAG_ZIA_REEXECUTE (1ULL << 32)
|
||||||
|
|
||||||
#define ZIO_ALLOCATOR_NONE (-1)
|
#define ZIO_ALLOCATOR_NONE (-1)
|
||||||
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
|
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
|
||||||
|
|
||||||
|
@ -532,6 +534,8 @@ struct zio {
|
||||||
|
|
||||||
/* Taskq dispatching state */
|
/* Taskq dispatching state */
|
||||||
taskq_ent_t io_tqent;
|
taskq_ent_t io_tqent;
|
||||||
|
|
||||||
|
boolean_t io_can_offload;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum blk_verify_flag {
|
enum blk_verify_flag {
|
||||||
|
@ -622,6 +626,7 @@ extern void zio_data_buf_free(void *buf, size_t size);
|
||||||
|
|
||||||
extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
|
extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
|
||||||
uint64_t bufsize, zio_transform_func_t *transform);
|
uint64_t bufsize, zio_transform_func_t *transform);
|
||||||
|
extern zio_transform_t *zio_pop_transform(zio_t *zio);
|
||||||
extern void zio_pop_transforms(zio_t *zio);
|
extern void zio_pop_transforms(zio_t *zio);
|
||||||
|
|
||||||
extern void zio_resubmit_stage_async(void *);
|
extern void zio_resubmit_stage_async(void *);
|
||||||
|
|
|
@ -144,10 +144,18 @@ typedef const struct zio_compress_info {
|
||||||
|
|
||||||
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
|
||||||
|
|
||||||
|
extern int zio_compress_zeroed_cb(void *data, size_t len, void *private);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* lz4 compression init & free
|
* lz4 compression init & free
|
||||||
*/
|
*/
|
||||||
extern void lz4_init(void);
|
extern void lz4_init(void);
|
||||||
|
extern size_t
|
||||||
|
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
|
size_t d_len, int n);
|
||||||
|
extern int
|
||||||
|
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
|
size_t d_len, int n);
|
||||||
extern void lz4_fini(void);
|
extern void lz4_fini(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -2963,7 +2963,20 @@
|
||||||
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
|
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_SIZE' value='36'/>
|
||||||
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
|
<enumerator name='ZPOOL_PROP_DEDUP_TABLE_QUOTA' value='37'/>
|
||||||
<enumerator name='ZPOOL_PROP_DEDUPCACHED' value='38'/>
|
<enumerator name='ZPOOL_PROP_DEDUPCACHED' value='38'/>
|
||||||
<enumerator name='ZPOOL_NUM_PROPS' value='39'/>
|
<enumerator name='ZPOOL_PROP_ZIA_AVAILABLE' value='39'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_PROVIDER' value='40'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_COMPRESS' value='41'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_DECOMPRESS' value='42'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_CHECKSUM' value='43'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ1_GEN' value='44'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ2_GEN' value='45'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ3_GEN' value='46'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ1_REC' value='47'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ2_REC' value='48'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_RAIDZ3_REC' value='49'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_FILE_WRITE' value='50'/>
|
||||||
|
<enumerator name='ZPOOL_PROP_ZIA_DISK_WRITE' value='51'/>
|
||||||
|
<enumerator name='ZPOOL_NUM_PROPS' value='52'/>
|
||||||
</enum-decl>
|
</enum-decl>
|
||||||
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
|
<typedef-decl name='zpool_prop_t' type-id='af1ba157' id='5d0c23fb'/>
|
||||||
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
|
<typedef-decl name='regoff_t' type-id='95e97e5e' id='54a2a2a8'/>
|
||||||
|
|
|
@ -184,6 +184,8 @@ nodist_libzpool_la_SOURCES = \
|
||||||
module/zfs/zfs_rlock.c \
|
module/zfs/zfs_rlock.c \
|
||||||
module/zfs/zfs_sa.c \
|
module/zfs/zfs_sa.c \
|
||||||
module/zfs/zil.c \
|
module/zfs/zil.c \
|
||||||
|
module/zfs/zia.c \
|
||||||
|
module/zfs/zia_cddl.c \
|
||||||
module/zfs/zio.c \
|
module/zfs/zio.c \
|
||||||
module/zfs/zio_checksum.c \
|
module/zfs/zio_checksum.c \
|
||||||
module/zfs/zio_compress.c \
|
module/zfs/zio_compress.c \
|
||||||
|
|
|
@ -464,6 +464,42 @@ command, though this property can be used when a specific version is needed for
|
||||||
backwards compatibility.
|
backwards compatibility.
|
||||||
Once feature flags are enabled on a pool this property will no longer have a
|
Once feature flags are enabled on a pool this property will no longer have a
|
||||||
value.
|
value.
|
||||||
|
.It Sy zia_checksum Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload checksum computations.
|
||||||
|
Does not have any effect if the checksum stage is disabled.
|
||||||
|
Embedded checksums are onloaded, and will suffer a data movement penalty.
|
||||||
|
.It Sy zia_compress Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload compression.
|
||||||
|
Does not have any effect if the compression stage is disabled.
|
||||||
|
Embedded data is onloaded, and will suffer a data movement penalty.
|
||||||
|
.It Sy zia_decompress Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload decompression.
|
||||||
|
.It Sy zia_disk_write Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether a pool should offload write I/Os to disks.
|
||||||
|
.It Sy zia_file_write Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether a pool should offload write I/Os to files.
|
||||||
|
.It Sy zia_provider Ns = Ns Sy (unset) | Ns Sy Z.I.A. Provider Name
|
||||||
|
Selects an accelerator registered in the Data Processing Unit Services
|
||||||
|
Module to offload data to.
|
||||||
|
Only one accelerator can be used by a pool at a time.
|
||||||
|
.It Sy zia_raidz1_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ1 parity generation.
|
||||||
|
Does not have any effect if RAIDZ1 is disabled.
|
||||||
|
.It Sy zia_raidz1_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ1 reconstruction.
|
||||||
|
Does not have any effect if RAIDZ1 is disabled.
|
||||||
|
.It Sy zia_raidz2_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ2 parity generation.
|
||||||
|
Does not have any effect if RAIDZ2 is disabled.
|
||||||
|
.It Sy zia_raidz2_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ2 reconstruction.
|
||||||
|
Does not have any effect if RAIDZ2 is disabled.
|
||||||
|
.It Sy zia_raidz3_gen Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ3 parity generation.
|
||||||
|
Does not have any effect if RAIDZ3 is disabled.
|
||||||
|
.It Sy zia_raidz3_rec Ns = Ns Sy on Ns | Ns Sy off
|
||||||
|
Controls whether the pool should offload RAIDZ3 reconstruction.
|
||||||
|
Does not have any effect if RAIDZ3 is disabled.
|
||||||
.El
|
.El
|
||||||
.
|
.
|
||||||
.Ss User Properties
|
.Ss User Properties
|
||||||
|
|
|
@ -27,6 +27,7 @@ ZFS_MODULE_CFLAGS += -I$(zfs_include)/os/linux/zfs
|
||||||
ZFS_MODULE_CFLAGS += -I$(zfs_include)
|
ZFS_MODULE_CFLAGS += -I$(zfs_include)
|
||||||
ZFS_MODULE_CPPFLAGS += -D_KERNEL
|
ZFS_MODULE_CPPFLAGS += -D_KERNEL
|
||||||
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
|
ZFS_MODULE_CPPFLAGS += @KERNEL_DEBUG_CPPFLAGS@
|
||||||
|
ZFS_MODULE_CPPFLAGS += @KERNEL_ZIA_CPPFLAGS@
|
||||||
|
|
||||||
# KASAN enables -Werror=frame-larger-than=1024, which
|
# KASAN enables -Werror=frame-larger-than=1024, which
|
||||||
# breaks oh so many parts of our build.
|
# breaks oh so many parts of our build.
|
||||||
|
@ -424,6 +425,8 @@ ZFS_OBJS := \
|
||||||
zfs_sa.o \
|
zfs_sa.o \
|
||||||
zfs_vnops.o \
|
zfs_vnops.o \
|
||||||
zil.o \
|
zil.o \
|
||||||
|
zia.o \
|
||||||
|
zia_cddl.o \
|
||||||
zio.o \
|
zio.o \
|
||||||
zio_checksum.o \
|
zio_checksum.o \
|
||||||
zio_compress.o \
|
zio_compress.o \
|
||||||
|
@ -503,3 +506,19 @@ OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512f.o := y
|
||||||
ifeq ($(CONFIG_ALTIVEC),y)
|
ifeq ($(CONFIG_ALTIVEC),y)
|
||||||
$(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec
|
$(obj)/zfs/vdev_raidz_math_powerpc_altivec.o : c_flags += -maltivec
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ("@DPUSM_SYMBOLS@","")
|
||||||
|
obj-$(CONFIG_ZFS) += zia-software-provider.o
|
||||||
|
|
||||||
|
ZIA_SOFTWARE_PROVIDER_OBJS := \
|
||||||
|
provider.o \
|
||||||
|
kernel_offloader.o
|
||||||
|
|
||||||
|
zia-software-provider-objs += $(addprefix zia-software-provider/,$(ZIA_SOFTWARE_PROVIDER_OBJS))
|
||||||
|
# zfs_file_os does not have any dependencies, so just link to it directly
|
||||||
|
zia-software-provider-objs += os/linux/zfs/zfs_file_os.o
|
||||||
|
|
||||||
|
$(addprefix $(obj)/zia-software-provider/,$(ZIA_SOFTWARE_PROVIDER_OBJS)) : ccflags-y += -I@abs_top_builddir@ $(ZFS_MODULE_CFLAGS) -I@abs_srcdir@/zia-software-provider/ -I@DPUSM_ROOT@/include
|
||||||
|
|
||||||
|
@ZIA_ENABLED_TRUE@KBUILD_EXTRA_SYMBOLS += @DPUSM_SYMBOLS@
|
||||||
|
endif
|
|
@ -80,7 +80,7 @@ clean: clean-@ac_system@
|
||||||
|
|
||||||
.PHONY: modules_uninstall-Linux-legacy
|
.PHONY: modules_uninstall-Linux-legacy
|
||||||
modules_uninstall-Linux-legacy:
|
modules_uninstall-Linux-legacy:
|
||||||
$(RM) -r $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,spl/ avl/ icp/ lua/ nvpair/ unicode/ zcommon/ zfs/ zstd/)
|
$(RM) -r $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,spl/ avl/ icp/ lua/ nvpair/ unicode/ zcommon/ zfs/ zstd/ zia-software-provider)
|
||||||
|
|
||||||
KMODDIR := $(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@
|
KMODDIR := $(INSTALL_MOD_PATH)/lib/modules/@LINUX_VERSION@
|
||||||
modules_install-Linux: modules_uninstall-Linux-legacy
|
modules_install-Linux: modules_uninstall-Linux-legacy
|
||||||
|
@ -123,7 +123,7 @@ data_install: data_install-@ac_system@
|
||||||
|
|
||||||
modules_uninstall-Linux: modules_uninstall-Linux-legacy
|
modules_uninstall-Linux: modules_uninstall-Linux-legacy
|
||||||
@# Uninstall the kernel modules
|
@# Uninstall the kernel modules
|
||||||
$(RM) $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,zfs.ko spl.ko)
|
$(RM) $(addprefix $(KMODDIR)/$(INSTALL_MOD_DIR)/,zfs.ko spl.ko zia-software-provider)
|
||||||
|
|
||||||
modules_uninstall-FreeBSD:
|
modules_uninstall-FreeBSD:
|
||||||
@false
|
@false
|
||||||
|
@ -153,7 +153,7 @@ cppcheck-Linux:
|
||||||
-I @top_srcdir@/include/os/linux/spl \
|
-I @top_srcdir@/include/os/linux/spl \
|
||||||
-I @top_srcdir@/include/os/linux/zfs \
|
-I @top_srcdir@/include/os/linux/zfs \
|
||||||
-I @top_srcdir@/include \
|
-I @top_srcdir@/include \
|
||||||
avl icp lua nvpair unicode zcommon zfs zstd os/linux
|
avl icp lua nvpair unicode zcommon zfs zstd os/linux zia-software-provider
|
||||||
|
|
||||||
cppcheck-FreeBSD:
|
cppcheck-FreeBSD:
|
||||||
@true
|
@true
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include <sys/vdev_trim.h>
|
#include <sys/vdev_trim.h>
|
||||||
#include <sys/abd.h>
|
#include <sys/abd.h>
|
||||||
#include <sys/fs/zfs.h>
|
#include <sys/fs/zfs.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
#include <linux/blkpg.h>
|
#include <linux/blkpg.h>
|
||||||
#include <linux/msdos_fs.h>
|
#include <linux/msdos_fs.h>
|
||||||
|
@ -224,7 +225,7 @@ bdev_max_capacity(struct block_device *bdev, uint64_t wholedisk)
|
||||||
return (psize);
|
return (psize);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
vdev_disk_error(zio_t *zio)
|
vdev_disk_error(zio_t *zio)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -337,6 +338,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||||
reread_part = B_TRUE;
|
reread_part = B_TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zia_disk_close(v);
|
||||||
vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
|
vdev_blkdev_put(bdh, smode, zfs_vdev_holder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,6 +463,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
|
||||||
*logical_ashift = highbit64(MAX(logical_block_size,
|
*logical_ashift = highbit64(MAX(logical_block_size,
|
||||||
SPA_MINBLOCKSIZE)) - 1;
|
SPA_MINBLOCKSIZE)) - 1;
|
||||||
|
|
||||||
|
zia_get_props(v->vdev_spa)->min_offload_size = 2 << *physical_ashift;
|
||||||
|
|
||||||
|
/* open disk; ignore errors - will fall back to ZFS */
|
||||||
|
zia_disk_open(v, v->vdev_path, BDH_BDEV(vd->vd_bdh));
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -472,9 +479,11 @@ vdev_disk_close(vdev_t *v)
|
||||||
if (v->vdev_reopening || vd == NULL)
|
if (v->vdev_reopening || vd == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (vd->vd_bdh != NULL)
|
if (vd->vd_bdh != NULL) {
|
||||||
|
zia_disk_close(v);
|
||||||
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
|
vdev_blkdev_put(vd->vd_bdh, spa_mode(v->vdev_spa),
|
||||||
zfs_vdev_holder);
|
zfs_vdev_holder);
|
||||||
|
}
|
||||||
|
|
||||||
rw_destroy(&vd->vd_lock);
|
rw_destroy(&vd->vd_lock);
|
||||||
kmem_free(vd, sizeof (vdev_disk_t));
|
kmem_free(vd, sizeof (vdev_disk_t));
|
||||||
|
@ -1104,17 +1113,10 @@ vdev_classic_bio_max_segs(zio_t *zio, int bio_size, uint64_t abd_offset)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
vdev_classic_physio(zio_t *zio)
|
__vdev_classic_physio(struct block_device *bdev, zio_t *zio,
|
||||||
|
size_t io_size, uint64_t io_offset, int rw, int flags)
|
||||||
{
|
{
|
||||||
vdev_t *v = zio->io_vd;
|
|
||||||
vdev_disk_t *vd = v->vdev_tsd;
|
|
||||||
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
|
||||||
size_t io_size = zio->io_size;
|
|
||||||
uint64_t io_offset = zio->io_offset;
|
|
||||||
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
|
||||||
int flags = 0;
|
|
||||||
|
|
||||||
dio_request_t *dr;
|
dio_request_t *dr;
|
||||||
uint64_t abd_offset;
|
uint64_t abd_offset;
|
||||||
uint64_t bio_offset;
|
uint64_t bio_offset;
|
||||||
|
@ -1221,6 +1223,23 @@ retry:
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(__vdev_classic_physio);
|
||||||
|
|
||||||
|
static int
|
||||||
|
vdev_classic_physio(zio_t *zio)
|
||||||
|
{
|
||||||
|
vdev_t *v = zio->io_vd;
|
||||||
|
vdev_disk_t *vd = v->vdev_tsd;
|
||||||
|
struct block_device *bdev = BDH_BDEV(vd->vd_bdh);
|
||||||
|
size_t io_size = zio->io_size;
|
||||||
|
uint64_t io_offset = zio->io_offset;
|
||||||
|
int rw = zio->io_type == ZIO_TYPE_READ ? READ : WRITE;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
|
return __vdev_classic_physio(bdev, zio,
|
||||||
|
io_size, io_offset, rw, flags);
|
||||||
|
}
|
||||||
|
|
||||||
/* ========== */
|
/* ========== */
|
||||||
|
|
||||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||||
|
@ -1242,7 +1261,7 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
|
||||||
zio_interrupt(zio);
|
zio_interrupt(zio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
|
@ -1265,6 +1284,8 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(vdev_disk_io_flush);
|
||||||
|
|
||||||
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
BIO_END_IO_PROTO(vdev_disk_discard_end_io, bio, error)
|
||||||
{
|
{
|
||||||
zio_t *zio = bio->bi_private;
|
zio_t *zio = bio->bi_private;
|
||||||
|
@ -1423,6 +1444,17 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
* Issue the flush. If successful, the response will
|
* Issue the flush. If successful, the response will
|
||||||
* be handled in the completion callback, so we're done.
|
* be handled in the completion callback, so we're done.
|
||||||
*/
|
*/
|
||||||
|
error = zia_disk_flush(v, zio);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* have to return here in order to not dispatch
|
||||||
|
* this zio to multiple task queues
|
||||||
|
*/
|
||||||
|
if (error == 0) {
|
||||||
|
rw_exit(&vd->vd_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
|
error = vdev_disk_io_flush(BDH_BDEV(vd->vd_bdh), zio);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
rw_exit(&vd->vd_lock);
|
rw_exit(&vd->vd_lock);
|
||||||
|
@ -1446,8 +1478,46 @@ vdev_disk_io_start(zio_t *zio)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case ZIO_TYPE_READ:
|
case ZIO_TYPE_READ:
|
||||||
|
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||||
|
error = vdev_disk_io_rw_fn(zio);
|
||||||
|
rw_exit(&vd->vd_lock);
|
||||||
|
if (error) {
|
||||||
|
zio->io_error = error;
|
||||||
|
zio_interrupt(zio);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
|
||||||
case ZIO_TYPE_WRITE:
|
case ZIO_TYPE_WRITE:
|
||||||
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
zio->io_target_timestamp = zio_handle_io_delay(zio);
|
||||||
|
error = EIO;
|
||||||
|
|
||||||
|
boolean_t local_offload = B_FALSE;
|
||||||
|
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||||
|
if ((zia_props->disk_write == 1) &&
|
||||||
|
(zio->io_can_offload == B_TRUE)) {
|
||||||
|
if (zia_offload_abd(zia_props->provider, zio->io_abd,
|
||||||
|
zio->io_size, zia_props->min_offload_size,
|
||||||
|
&local_offload, B_TRUE) == ZIA_OK) {
|
||||||
|
error = zia_disk_write(v, zio, zio->io_size,
|
||||||
|
zio->io_offset, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error == 0) {
|
||||||
|
rw_exit(&vd->vd_lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = zia_cleanup_abd(zio->io_abd, zio->io_size,
|
||||||
|
local_offload, B_TRUE);
|
||||||
|
|
||||||
|
if (error == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_disable_offloading(zio, B_TRUE);
|
||||||
|
rw_exit(&vd->vd_lock);
|
||||||
|
zio_interrupt(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
error = vdev_disk_io_rw_fn(zio);
|
error = vdev_disk_io_rw_fn(zio);
|
||||||
rw_exit(&vd->vd_lock);
|
rw_exit(&vd->vd_lock);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
@ -1488,6 +1558,7 @@ vdev_disk_io_done(zio_t *zio)
|
||||||
vdev_disk_t *vd = v->vdev_tsd;
|
vdev_disk_t *vd = v->vdev_tsd;
|
||||||
|
|
||||||
if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) {
|
if (!zfs_check_disk_status(BDH_BDEV(vd->vd_bdh))) {
|
||||||
|
zia_disk_invalidate(v);
|
||||||
invalidate_bdev(BDH_BDEV(vd->vd_bdh));
|
invalidate_bdev(BDH_BDEV(vd->vd_bdh));
|
||||||
v->vdev_remove_wanted = B_TRUE;
|
v->vdev_remove_wanted = B_TRUE;
|
||||||
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
|
spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#include <sys/fcntl.h>
|
#include <sys/fcntl.h>
|
||||||
#include <sys/vnode.h>
|
#include <sys/vnode.h>
|
||||||
#include <sys/zfs_file.h>
|
#include <sys/zfs_file.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#include <linux/falloc.h>
|
#include <linux/falloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -68,7 +69,11 @@ vdev_file_rele(vdev_t *vd)
|
||||||
ASSERT(vd->vdev_path != NULL);
|
ASSERT(vd->vdev_path != NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
mode_t
|
||||||
|
#else
|
||||||
static mode_t
|
static mode_t
|
||||||
|
#endif
|
||||||
vdev_file_open_mode(spa_mode_t spa_mode)
|
vdev_file_open_mode(spa_mode_t spa_mode)
|
||||||
{
|
{
|
||||||
mode_t mode = 0;
|
mode_t mode = 0;
|
||||||
|
@ -161,6 +166,12 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
zia_get_props(vd->vdev_spa)->min_offload_size = 2 << *physical_ashift;
|
||||||
|
|
||||||
|
/* try to open the file; ignore errors - will fall back to ZFS */
|
||||||
|
zia_file_open(vd, vd->vdev_path,
|
||||||
|
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0);
|
||||||
|
|
||||||
skip_open:
|
skip_open:
|
||||||
|
|
||||||
error = zfs_file_getattr(vf->vf_file, &zfa);
|
error = zfs_file_getattr(vf->vf_file, &zfa);
|
||||||
|
@ -184,6 +195,8 @@ vdev_file_close(vdev_t *vd)
|
||||||
if (vd->vdev_reopening || vf == NULL)
|
if (vd->vdev_reopening || vf == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
zia_file_close(vd);
|
||||||
|
|
||||||
if (vf->vf_file != NULL) {
|
if (vf->vf_file != NULL) {
|
||||||
(void) zfs_file_close(vf->vf_file);
|
(void) zfs_file_close(vf->vf_file);
|
||||||
}
|
}
|
||||||
|
@ -203,18 +216,53 @@ vdev_file_io_strategy(void *arg)
|
||||||
void *buf;
|
void *buf;
|
||||||
loff_t off;
|
loff_t off;
|
||||||
ssize_t size;
|
ssize_t size;
|
||||||
int err;
|
int err = 0;
|
||||||
|
|
||||||
off = zio->io_offset;
|
off = zio->io_offset;
|
||||||
size = zio->io_size;
|
size = zio->io_size;
|
||||||
resid = 0;
|
resid = 0;
|
||||||
|
|
||||||
if (zio->io_type == ZIO_TYPE_READ) {
|
if (zio->io_type == ZIO_TYPE_READ) {
|
||||||
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
|
buf = abd_borrow_buf(zio->io_abd, size);
|
||||||
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
|
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
|
||||||
abd_return_buf_copy(zio->io_abd, buf, size);
|
abd_return_buf_copy(zio->io_abd, buf, size);
|
||||||
} else {
|
} else {
|
||||||
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
|
err = EIO;
|
||||||
|
|
||||||
|
boolean_t local_offload = B_FALSE;
|
||||||
|
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||||
|
|
||||||
|
if ((zia_props->file_write == 1) &&
|
||||||
|
(zio->io_can_offload == B_TRUE)) {
|
||||||
|
if (zia_offload_abd(zia_props->provider, zio->io_abd,
|
||||||
|
size, zia_props->min_offload_size,
|
||||||
|
&local_offload, B_TRUE) == ZIA_OK) {
|
||||||
|
err = zia_file_write(vd, zio->io_abd, size, off,
|
||||||
|
&resid, &err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if offload and write succeeded, return here */
|
||||||
|
if (err == 0) {
|
||||||
|
zio->io_error = err;
|
||||||
|
if (resid != 0 && zio->io_error == 0)
|
||||||
|
zio->io_error = SET_ERROR(ENOSPC);
|
||||||
|
|
||||||
|
zio_delay_interrupt(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if offload or write failed, bring data back into memory */
|
||||||
|
err = zia_cleanup_abd(zio->io_abd, size, local_offload, B_TRUE);
|
||||||
|
|
||||||
|
/* if onload failed, restart the zio with offloading disabled */
|
||||||
|
if (err == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_disable_offloading(zio, B_TRUE);
|
||||||
|
zio_delay_interrupt(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = abd_borrow_buf_copy(zio->io_abd, size);
|
||||||
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
|
err = zfs_file_pwrite(vf->vf_file, buf, size, off, &resid);
|
||||||
abd_return_buf(zio->io_abd, buf, size);
|
abd_return_buf(zio->io_abd, buf, size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -187,6 +187,51 @@ zpool_prop_init(void)
|
||||||
ZPOOL_DEDUPCACHED_PROP_NAME, PROP_TYPE_NUMBER, PROP_READONLY,
|
ZPOOL_DEDUPCACHED_PROP_NAME, PROP_TYPE_NUMBER, PROP_READONLY,
|
||||||
ZFS_TYPE_POOL, "DEDUPCACHED", B_FALSE, sfeatures);
|
ZFS_TYPE_POOL, "DEDUPCACHED", B_FALSE, sfeatures);
|
||||||
|
|
||||||
|
zprop_register_string(ZPOOL_PROP_ZIA_AVAILABLE, "zia_available",
|
||||||
|
#ifdef ZIA
|
||||||
|
"yes",
|
||||||
|
#else
|
||||||
|
"no",
|
||||||
|
#endif
|
||||||
|
PROP_READONLY, ZFS_TYPE_POOL, "yes | no", "zia_available",
|
||||||
|
sfeatures);
|
||||||
|
zprop_register_string(ZPOOL_PROP_ZIA_PROVIDER, "zia_provider", NULL,
|
||||||
|
PROP_DEFAULT, ZFS_TYPE_POOL, "<Z.I.A. Provider Name>", "PROVIDER",
|
||||||
|
sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_COMPRESS, "zia_compress",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_compress", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_DECOMPRESS, "zia_decompress",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_decompress", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_CHECKSUM,
|
||||||
|
"zia_checksum", 1, PROP_DEFAULT, ZFS_TYPE_POOL,
|
||||||
|
"on | off", "zia_checksum", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ1_GEN, "zia_raidz1_gen",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz1_gen", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ2_GEN, "zia_raidz2_gen",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz2_gen", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ3_GEN, "zia_raidz3_gen",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz3_gen", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ1_REC, "zia_raidz1_rec",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz1_rec", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ2_REC, "zia_raidz2_rec",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz2_rec", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_RAIDZ3_REC, "zia_raidz3_rec",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_raidz3_rec", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_FILE_WRITE, "zia_file_write",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_file_write", boolean_table, sfeatures);
|
||||||
|
zprop_register_index(ZPOOL_PROP_ZIA_DISK_WRITE, "zia_disk_write",
|
||||||
|
1, PROP_DEFAULT, ZFS_TYPE_POOL, "on | off",
|
||||||
|
"zia_disk_write", boolean_table, sfeatures);
|
||||||
|
|
||||||
zfs_mod_list_supported_free(sfeatures);
|
zfs_mod_list_supported_free(sfeatures);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
© 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
|
||||||
|
This program was produced under U.S. Government contract
|
||||||
|
89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
is operated by Triad National Security, LLC for the U.S.
|
||||||
|
Department of Energy/National Nuclear Security Administration. All
|
||||||
|
rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
Administration. The Government is granted for itself and others
|
||||||
|
acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
license in this material to reproduce, prepare derivative works,
|
||||||
|
distribute copies to the public, perform publicly and display
|
||||||
|
publicly, and to permit others to do so.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
This program is open source under the BSD-3 License.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from this
|
||||||
|
software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1 @@
|
||||||
|
Z.I.A. FUNCTIONALITY IN ZFS
|
|
@ -101,6 +101,7 @@
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
|
|
||||||
/* see block comment above for description */
|
/* see block comment above for description */
|
||||||
int zfs_abd_scatter_enabled = B_TRUE;
|
int zfs_abd_scatter_enabled = B_TRUE;
|
||||||
|
@ -147,11 +148,15 @@ abd_init_struct(abd_t *abd)
|
||||||
abd->abd_parent = NULL;
|
abd->abd_parent = NULL;
|
||||||
#endif
|
#endif
|
||||||
abd->abd_size = 0;
|
abd->abd_size = 0;
|
||||||
|
|
||||||
|
abd->abd_zia_handle = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
abd_fini_struct(abd_t *abd)
|
abd_fini_struct(abd_t *abd)
|
||||||
{
|
{
|
||||||
|
zia_free_abd(abd, B_TRUE);
|
||||||
|
|
||||||
mutex_destroy(&abd->abd_mtx);
|
mutex_destroy(&abd->abd_mtx);
|
||||||
ASSERT(!list_link_active(&abd->abd_gang_link));
|
ASSERT(!list_link_active(&abd->abd_gang_link));
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
|
@ -321,6 +326,8 @@ abd_free(abd_t *abd)
|
||||||
abd_free_struct_impl(abd);
|
abd_free_struct_impl(abd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(abd_free);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate an ABD of the same format (same metadata flag, same scatterize
|
* Allocate an ABD of the same format (same metadata flag, same scatterize
|
||||||
* setting) as another ABD.
|
* setting) as another ABD.
|
||||||
|
@ -630,6 +637,8 @@ abd_get_from_buf(void *buf, size_t size)
|
||||||
return (abd_get_from_buf_impl(abd, buf, size));
|
return (abd_get_from_buf_impl(abd, buf, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(abd_get_from_buf);
|
||||||
|
|
||||||
abd_t *
|
abd_t *
|
||||||
abd_get_from_buf_struct(abd_t *abd, void *buf, size_t size)
|
abd_get_from_buf_struct(abd_t *abd, void *buf, size_t size)
|
||||||
{
|
{
|
||||||
|
@ -736,7 +745,6 @@ abd_release_ownership_of_buf(abd_t *abd)
|
||||||
abd_update_linear_stats(abd, ABDSTAT_DECR);
|
abd_update_linear_stats(abd, ABDSTAT_DECR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Give this ABD ownership of the buffer that it's storing. Can only be used on
|
* Give this ABD ownership of the buffer that it's storing. Can only be used on
|
||||||
* linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
|
* linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
|
||||||
|
|
|
@ -57,6 +57,7 @@
|
||||||
#include <sys/trace_zfs.h>
|
#include <sys/trace_zfs.h>
|
||||||
#include <sys/zfs_racct.h>
|
#include <sys/zfs_racct.h>
|
||||||
#include <sys/zfs_rlock.h>
|
#include <sys/zfs_rlock.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
#include <sys/vmsystm.h>
|
#include <sys/vmsystm.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
|
@ -2778,6 +2779,7 @@ byteswap_uint8_array(void *vbuf, size_t size)
|
||||||
void
|
void
|
||||||
dmu_init(void)
|
dmu_init(void)
|
||||||
{
|
{
|
||||||
|
zia_init();
|
||||||
abd_init();
|
abd_init();
|
||||||
zfs_dbgmsg_init();
|
zfs_dbgmsg_init();
|
||||||
sa_cache_init();
|
sa_cache_init();
|
||||||
|
@ -2793,6 +2795,7 @@ dmu_init(void)
|
||||||
void
|
void
|
||||||
dmu_fini(void)
|
dmu_fini(void)
|
||||||
{
|
{
|
||||||
|
zia_fini();
|
||||||
arc_fini(); /* arc depends on l2arc, so arc must go first */
|
arc_fini(); /* arc depends on l2arc, so arc must go first */
|
||||||
l2arc_fini();
|
l2arc_fini();
|
||||||
dmu_tx_fini();
|
dmu_tx_fini();
|
||||||
|
|
|
@ -52,7 +52,7 @@ int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
|
||||||
|
|
||||||
static kmem_cache_t *lz4_cache;
|
static kmem_cache_t *lz4_cache;
|
||||||
|
|
||||||
static size_t
|
size_t
|
||||||
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
size_t d_len, int n)
|
size_t d_len, int n)
|
||||||
{
|
{
|
||||||
|
@ -80,7 +80,7 @@ zfs_lz4_compress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
return (bufsiz + sizeof (bufsiz));
|
return (bufsiz + sizeof (bufsiz));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
size_t d_len, int n)
|
size_t d_len, int n)
|
||||||
{
|
{
|
||||||
|
@ -103,6 +103,9 @@ zfs_lz4_decompress_buf(void *s_start, void *d_start, size_t s_len,
|
||||||
ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
|
ZFS_COMPRESS_WRAP_DECL(zfs_lz4_compress)
|
||||||
ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
|
ZFS_DECOMPRESS_WRAP_DECL(zfs_lz4_decompress)
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(zfs_lz4_compress_buf);
|
||||||
|
EXPORT_SYMBOL(zfs_lz4_decompress_buf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LZ4 API Description:
|
* LZ4 API Description:
|
||||||
*
|
*
|
||||||
|
|
198
module/zfs/spa.c
198
module/zfs/spa.c
|
@ -89,6 +89,7 @@
|
||||||
#include <sys/dsl_scan.h>
|
#include <sys/dsl_scan.h>
|
||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
#include <sys/dsl_destroy.h>
|
#include <sys/dsl_destroy.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#include <sys/zvol.h>
|
#include <sys/zvol.h>
|
||||||
|
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
|
@ -532,6 +533,46 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv)
|
||||||
dp->scd_path, 0, ZPROP_SRC_LOCAL);
|
dp->scd_path, 0, ZPROP_SRC_LOCAL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zia_props_t *zia_props = zia_get_props(spa);
|
||||||
|
if (zia_props->provider != NULL) {
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_PROVIDER,
|
||||||
|
(char *)zia_get_provider_name(zia_props->provider),
|
||||||
|
0, ZPROP_SRC_LOCAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_COMPRESS,
|
||||||
|
NULL, zia_props->compress, ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_DECOMPRESS,
|
||||||
|
NULL, zia_props->decompress, ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_CHECKSUM,
|
||||||
|
NULL, zia_props->checksum, ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ1_GEN,
|
||||||
|
NULL, zia_props->raidz.gen[1], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ2_GEN,
|
||||||
|
NULL, zia_props->raidz.gen[2], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ3_GEN,
|
||||||
|
NULL, zia_props->raidz.gen[3], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ1_REC,
|
||||||
|
NULL, zia_props->raidz.rec[1], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ2_REC,
|
||||||
|
NULL, zia_props->raidz.rec[2], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_RAIDZ3_REC,
|
||||||
|
NULL, zia_props->raidz.rec[3], ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_FILE_WRITE,
|
||||||
|
NULL, zia_props->file_write, ZPROP_SRC_LOCAL);
|
||||||
|
|
||||||
|
spa_prop_add_list(nv, ZPOOL_PROP_ZIA_DISK_WRITE,
|
||||||
|
NULL, zia_props->disk_write, ZPROP_SRC_LOCAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -844,6 +885,20 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||||
error = SET_ERROR(E2BIG);
|
error = SET_ERROR(E2BIG);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_PROP_ZIA_PROVIDER:
|
||||||
|
case ZPOOL_PROP_ZIA_COMPRESS:
|
||||||
|
case ZPOOL_PROP_ZIA_DECOMPRESS:
|
||||||
|
case ZPOOL_PROP_ZIA_CHECKSUM:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ1_GEN:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ2_GEN:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ3_GEN:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ1_REC:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ2_REC:
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ3_REC:
|
||||||
|
case ZPOOL_PROP_ZIA_FILE_WRITE:
|
||||||
|
case ZPOOL_PROP_ZIA_DISK_WRITE:
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -2190,6 +2245,11 @@ spa_unload(spa_t *spa)
|
||||||
|
|
||||||
spa->spa_raidz_expand = NULL;
|
spa->spa_raidz_expand = NULL;
|
||||||
|
|
||||||
|
if (zia_get_props(spa)->provider != NULL) {
|
||||||
|
zia_put_provider(&zia_get_props(spa)->provider,
|
||||||
|
spa->spa_root_vdev);
|
||||||
|
}
|
||||||
|
|
||||||
spa_config_exit(spa, SCL_ALL, spa);
|
spa_config_exit(spa, SCL_ALL, spa);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6685,6 +6745,8 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||||
|
|
||||||
spa_import_os(spa);
|
spa_import_os(spa);
|
||||||
|
|
||||||
|
zia_get_props(spa)->can_offload = B_FALSE;
|
||||||
|
|
||||||
mutex_exit(&spa_namespace_lock);
|
mutex_exit(&spa_namespace_lock);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
|
@ -9547,6 +9609,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
||||||
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
|
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
|
||||||
objset_t *mos = spa->spa_meta_objset;
|
objset_t *mos = spa->spa_meta_objset;
|
||||||
nvpair_t *elem = NULL;
|
nvpair_t *elem = NULL;
|
||||||
|
zia_props_t *zia_props = zia_get_props(spa);
|
||||||
|
|
||||||
mutex_enter(&spa->spa_props_lock);
|
mutex_enter(&spa->spa_props_lock);
|
||||||
|
|
||||||
|
@ -9620,7 +9683,142 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
||||||
spa_history_log_internal(spa, "set", tx,
|
spa_history_log_internal(spa, "set", tx,
|
||||||
"%s=%s", nvpair_name(elem), strval);
|
"%s=%s", nvpair_name(elem), strval);
|
||||||
break;
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_PROVIDER:
|
||||||
|
strval = fnvpair_value_string(elem);
|
||||||
|
if (zia_props->provider != NULL)
|
||||||
|
zia_put_provider(&zia_props->provider,
|
||||||
|
spa->spa_root_vdev);
|
||||||
|
zia_props->provider = zia_get_provider(strval,
|
||||||
|
spa->spa_root_vdev);
|
||||||
|
zia_props->can_offload = !!zia_props->provider;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dirty the configuration on vdevs as above.
|
||||||
|
*/
|
||||||
|
if (tx->tx_txg != TXG_INITIAL) {
|
||||||
|
vdev_config_dirty(spa->spa_root_vdev);
|
||||||
|
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* reopen devices so that provider is used
|
||||||
|
* copied from zfs_ioc_pool_reopen
|
||||||
|
*/
|
||||||
|
spa_vdev_state_enter(spa, SCL_NONE);
|
||||||
|
vdev_close(spa->spa_root_vdev);
|
||||||
|
(void) vdev_open(spa->spa_root_vdev);
|
||||||
|
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||||
|
|
||||||
|
spa_history_log_internal(spa, "set", tx,
|
||||||
|
"%s=%s", nvpair_name(elem), strval);
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_COMPRESS:
|
||||||
|
zia_props->compress =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->compress,
|
||||||
|
"Compression");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_DECOMPRESS:
|
||||||
|
zia_props->decompress =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->decompress,
|
||||||
|
"Decompression");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_CHECKSUM:
|
||||||
|
zia_props->checksum =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->checksum,
|
||||||
|
"Checksum");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ1_GEN:
|
||||||
|
zia_props->raidz.gen[1] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->raidz.gen[1],
|
||||||
|
"RAIDZ 1 Generation");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ2_GEN:
|
||||||
|
zia_props->raidz.gen[2] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->raidz.gen[2],
|
||||||
|
"RAIDZ 2 Generation");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ3_GEN:
|
||||||
|
zia_props->raidz.gen[3] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
zia_prop_warn(zia_props->raidz.gen[3],
|
||||||
|
"RAIDZ 3 Generation");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ1_REC:
|
||||||
|
zia_props->raidz.rec[1] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
/* need checksum */
|
||||||
|
if (zia_props->raidz.rec[1]) {
|
||||||
|
if (!zia_props->checksum) {
|
||||||
|
zia_props->checksum = 1;
|
||||||
|
zia_prop_warn(
|
||||||
|
zia_props->checksum,
|
||||||
|
"Checksum");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zia_prop_warn(zia_props->raidz.rec[1],
|
||||||
|
"RAIDZ 1 Reconstruction");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ2_REC:
|
||||||
|
zia_props->raidz.rec[2] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
/* need checksum */
|
||||||
|
if (zia_props->raidz.rec[2]) {
|
||||||
|
if (!zia_props->checksum) {
|
||||||
|
zia_props->checksum = 1;
|
||||||
|
zia_prop_warn(
|
||||||
|
zia_props->checksum,
|
||||||
|
"Checksum");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zia_prop_warn(zia_props->raidz.rec[2],
|
||||||
|
"RAIDZ 2 Reconstruction");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_RAIDZ3_REC:
|
||||||
|
zia_props->raidz.rec[3] =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
/* need checksum */
|
||||||
|
if (zia_props->raidz.rec[3]) {
|
||||||
|
if (!zia_props->checksum) {
|
||||||
|
zia_props->checksum = 1;
|
||||||
|
zia_prop_warn(
|
||||||
|
zia_props->checksum,
|
||||||
|
"Checksum");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
zia_prop_warn(zia_props->raidz.rec[3],
|
||||||
|
"RAIDZ 3 Reconstruction");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_FILE_WRITE:
|
||||||
|
zia_props->file_write =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
|
||||||
|
/* reopen devices so that provider is used */
|
||||||
|
spa_vdev_state_enter(spa, SCL_NONE);
|
||||||
|
vdev_close(spa->spa_root_vdev);
|
||||||
|
(void) vdev_open(spa->spa_root_vdev);
|
||||||
|
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||||
|
|
||||||
|
zia_prop_warn(zia_props->file_write,
|
||||||
|
"File Write");
|
||||||
|
break;
|
||||||
|
case ZPOOL_PROP_ZIA_DISK_WRITE:
|
||||||
|
zia_props->disk_write =
|
||||||
|
fnvpair_value_uint64(elem);
|
||||||
|
|
||||||
|
/* reopen devices so that provider is used */
|
||||||
|
spa_vdev_state_enter(spa, SCL_NONE);
|
||||||
|
vdev_close(spa->spa_root_vdev);
|
||||||
|
(void) vdev_open(spa->spa_root_vdev);
|
||||||
|
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||||
|
|
||||||
|
zia_prop_warn(zia_props->disk_write,
|
||||||
|
"Disk Write");
|
||||||
|
break;
|
||||||
case ZPOOL_PROP_INVAL:
|
case ZPOOL_PROP_INVAL:
|
||||||
if (zpool_prop_feature(elemname)) {
|
if (zpool_prop_feature(elemname)) {
|
||||||
fname = strchr(elemname, '@') + 1;
|
fname = strchr(elemname, '@') + 1;
|
||||||
|
|
|
@ -725,6 +725,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||||
vd->vdev_stat.vs_timestamp = gethrtime();
|
vd->vdev_stat.vs_timestamp = gethrtime();
|
||||||
vdev_queue_init(vd);
|
vdev_queue_init(vd);
|
||||||
|
|
||||||
|
vd->vdev_zia_handle = NULL;
|
||||||
|
|
||||||
return (vd);
|
return (vd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1067,6 +1069,8 @@ vdev_free(vdev_t *vd)
|
||||||
*/
|
*/
|
||||||
vdev_close(vd);
|
vdev_close(vd);
|
||||||
|
|
||||||
|
ASSERT3P(vd->vdev_zia_handle, ==, NULL);
|
||||||
|
|
||||||
ASSERT(!list_link_active(&vd->vdev_config_dirty_node));
|
ASSERT(!list_link_active(&vd->vdev_config_dirty_node));
|
||||||
ASSERT(!list_link_active(&vd->vdev_state_dirty_node));
|
ASSERT(!list_link_active(&vd->vdev_state_dirty_node));
|
||||||
|
|
||||||
|
|
|
@ -1033,6 +1033,7 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
|
||||||
rr->rr_offset = io_offset;
|
rr->rr_offset = io_offset;
|
||||||
rr->rr_size = io_size;
|
rr->rr_size = io_size;
|
||||||
#endif
|
#endif
|
||||||
|
rr->rr_zia_handle = NULL;
|
||||||
*rrp = rr;
|
*rrp = rr;
|
||||||
|
|
||||||
uint8_t *base;
|
uint8_t *base;
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
#include <sys/vdev_draid.h>
|
#include <sys/vdev_draid.h>
|
||||||
#include <sys/uberblock_impl.h>
|
#include <sys/uberblock_impl.h>
|
||||||
#include <sys/dsl_scan.h>
|
#include <sys/dsl_scan.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
|
|
||||||
#ifdef ZFS_DEBUG
|
#ifdef ZFS_DEBUG
|
||||||
#include <sys/vdev.h> /* For vdev_xlate() in vdev_raidz_io_verify() */
|
#include <sys/vdev.h> /* For vdev_xlate() in vdev_raidz_io_verify() */
|
||||||
|
@ -376,6 +377,8 @@ static int zfs_scrub_after_expand = 1;
|
||||||
static void
|
static void
|
||||||
vdev_raidz_row_free(raidz_row_t *rr)
|
vdev_raidz_row_free(raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
|
zia_raidz_free(rr, B_FALSE);
|
||||||
|
|
||||||
for (int c = 0; c < rr->rr_cols; c++) {
|
for (int c = 0; c < rr->rr_cols; c++) {
|
||||||
raidz_col_t *rc = &rr->rr_col[c];
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
|
||||||
|
@ -628,6 +631,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
|
||||||
rr->rr_offset = zio->io_offset;
|
rr->rr_offset = zio->io_offset;
|
||||||
rr->rr_size = zio->io_size;
|
rr->rr_size = zio->io_size;
|
||||||
#endif
|
#endif
|
||||||
|
rr->rr_zia_handle = NULL;
|
||||||
|
|
||||||
uint64_t asize = 0;
|
uint64_t asize = 0;
|
||||||
|
|
||||||
|
@ -1094,7 +1098,7 @@ vdev_raidz_pqr_func(void *buf, size_t size, void *private)
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
vdev_raidz_generate_parity_p(raidz_row_t *rr)
|
vdev_raidz_generate_parity_p(raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||||
|
@ -1112,7 +1116,9 @@ vdev_raidz_generate_parity_p(raidz_row_t *rr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
EXPORT_SYMBOL(vdev_raidz_generate_parity_p);
|
||||||
|
|
||||||
|
void
|
||||||
vdev_raidz_generate_parity_pq(raidz_row_t *rr)
|
vdev_raidz_generate_parity_pq(raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||||
|
@ -1154,7 +1160,9 @@ vdev_raidz_generate_parity_pq(raidz_row_t *rr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
EXPORT_SYMBOL(vdev_raidz_generate_parity_pq);
|
||||||
|
|
||||||
|
void
|
||||||
vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
|
vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
uint64_t *p = abd_to_buf(rr->rr_col[VDEV_RAIDZ_P].rc_abd);
|
||||||
|
@ -1202,6 +1210,8 @@ vdev_raidz_generate_parity_pqr(raidz_row_t *rr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(vdev_raidz_generate_parity_pqr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate RAID parity in the first virtual columns according to the number of
|
* Generate RAID parity in the first virtual columns according to the number of
|
||||||
* parity columns available.
|
* parity columns available.
|
||||||
|
@ -1888,7 +1898,7 @@ vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing,
|
||||||
kmem_free(p, psize);
|
kmem_free(p, psize);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
|
vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
|
||||||
{
|
{
|
||||||
int i, c, t, tt;
|
int i, c, t, tt;
|
||||||
|
@ -2029,6 +2039,8 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EXPORT_SYMBOL(vdev_raidz_reconstruct_general);
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
|
vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
|
||||||
const int *t, int nt)
|
const int *t, int nt)
|
||||||
|
@ -2333,7 +2345,24 @@ vdev_raidz_io_start_write(zio_t *zio, raidz_row_t *rr)
|
||||||
vdev_t *vd = zio->io_vd;
|
vdev_t *vd = zio->io_vd;
|
||||||
raidz_map_t *rm = zio->io_vsd;
|
raidz_map_t *rm = zio->io_vsd;
|
||||||
|
|
||||||
vdev_raidz_generate_parity_row(rm, rr);
|
/*
|
||||||
|
* here instead of vdev_raidz_map_alloc or
|
||||||
|
* vdev_raidz_generate_parity_row to not have to
|
||||||
|
* store local_offload and be able to use zio
|
||||||
|
*/
|
||||||
|
boolean_t local_offload = B_FALSE;
|
||||||
|
if ((zia_raidz_alloc(zio, rr, B_FALSE, 0, &local_offload) != ZIA_OK) ||
|
||||||
|
(zia_raidz_gen(rr) != ZIA_OK)) {
|
||||||
|
if (zia_raidz_gen_cleanup(zio, rr,
|
||||||
|
local_offload) == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_disable_offloading(zio, B_TRUE);
|
||||||
|
zio->io_stage = ZIO_STAGE_VDEV_IO_ASSESS >> 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
vdev_raidz_generate_parity_row(rm, rr);
|
||||||
|
} else {
|
||||||
|
zio->io_flags |= ZIO_FLAG_DONT_AGGREGATE;
|
||||||
|
}
|
||||||
|
|
||||||
for (int c = 0; c < rr->rr_scols; c++) {
|
for (int c = 0; c < rr->rr_scols; c++) {
|
||||||
raidz_col_t *rc = &rr->rr_col[c];
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
@ -2631,14 +2660,69 @@ raidz_checksum_verify(zio_t *zio)
|
||||||
{
|
{
|
||||||
zio_bad_cksum_t zbc = {0};
|
zio_bad_cksum_t zbc = {0};
|
||||||
raidz_map_t *rm = zio->io_vsd;
|
raidz_map_t *rm = zio->io_vsd;
|
||||||
|
/*
|
||||||
|
* if the zio entered this function offloaded,
|
||||||
|
* need to onload the parity columns on error
|
||||||
|
*/
|
||||||
|
const boolean_t entered_offloaded = zia_is_offloaded(zio->io_abd);
|
||||||
|
|
||||||
int ret = zio_checksum_error(zio, &zbc);
|
int ret = zio_checksum_error(zio, &zbc);
|
||||||
if (ret != 0 && zbc.zbc_injected != 0)
|
if (ret != 0 && zbc.zbc_injected != 0)
|
||||||
rm->rm_ecksuminjected = 1;
|
rm->rm_ecksuminjected = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* zio_checksum_error does not get access to
|
||||||
|
* rm, so only the abd is freed on error -
|
||||||
|
* clean up rm here
|
||||||
|
*/
|
||||||
|
if (zia_is_offloaded(zio->io_abd) != B_TRUE) {
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* force onload, since data was modified
|
||||||
|
*
|
||||||
|
* ignore return value - will always return ZIA_ERROR
|
||||||
|
*/
|
||||||
|
zia_raidz_rec_cleanup(zio, rr, B_TRUE,
|
||||||
|
entered_offloaded);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
raidz_move_orig_parity(zio_t *zio, raidz_row_t *rr, abd_t **orig)
|
||||||
|
{
|
||||||
|
(void) zio;
|
||||||
|
|
||||||
|
for (uint64_t c = 0; c < rr->rr_firstdatacol; c++) {
|
||||||
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
if (!rc->rc_tried || rc->rc_error != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
orig[c] = rc->rc_abd;
|
||||||
|
ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
|
||||||
|
rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
|
||||||
|
zia_raidz_new_parity(zio, rr, c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
raidz_restore_orig_parity(raidz_row_t *rr, abd_t **orig)
|
||||||
|
{
|
||||||
|
for (uint64_t c = 0; c < rr->rr_firstdatacol; c++) {
|
||||||
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
if (!rc->rc_tried || rc->rc_error != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
abd_free(rc->rc_abd);
|
||||||
|
rc->rc_abd = orig[c];
|
||||||
|
orig[c] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Generate the parity from the data columns. If we tried and were able to
|
* Generate the parity from the data columns. If we tried and were able to
|
||||||
* read the parity without error, verify that the generated parity matches the
|
* read the parity without error, verify that the generated parity matches the
|
||||||
|
@ -2648,7 +2732,7 @@ raidz_checksum_verify(zio_t *zio)
|
||||||
static int
|
static int
|
||||||
raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
abd_t *orig[VDEV_RAIDZ_MAXPARITY];
|
abd_t *orig[VDEV_RAIDZ_MAXPARITY] = { NULL };
|
||||||
int c, ret = 0;
|
int c, ret = 0;
|
||||||
raidz_map_t *rm = zio->io_vsd;
|
raidz_map_t *rm = zio->io_vsd;
|
||||||
raidz_col_t *rc;
|
raidz_col_t *rc;
|
||||||
|
@ -2660,15 +2744,7 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||||
if (checksum == ZIO_CHECKSUM_NOPARITY)
|
if (checksum == ZIO_CHECKSUM_NOPARITY)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
||||||
for (c = 0; c < rr->rr_firstdatacol; c++) {
|
raidz_move_orig_parity(zio, rr, orig);
|
||||||
rc = &rr->rr_col[c];
|
|
||||||
if (!rc->rc_tried || rc->rc_error != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
orig[c] = rc->rc_abd;
|
|
||||||
ASSERT3U(abd_get_size(rc->rc_abd), ==, rc->rc_size);
|
|
||||||
rc->rc_abd = abd_alloc_linear(rc->rc_size, B_FALSE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify any empty sectors are zero filled to ensure the parity
|
* Verify any empty sectors are zero filled to ensure the parity
|
||||||
|
@ -2682,7 +2758,29 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||||
* isn't harmful but it does have the side effect of fixing stuff
|
* isn't harmful but it does have the side effect of fixing stuff
|
||||||
* we didn't realize was necessary (i.e. even if we return 0).
|
* we didn't realize was necessary (i.e. even if we return 0).
|
||||||
*/
|
*/
|
||||||
vdev_raidz_generate_parity_row(rm, rr);
|
if (zia_raidz_gen(rr) != ZIA_OK) {
|
||||||
|
/*
|
||||||
|
* restore original parity columns so
|
||||||
|
* that the reconstructed parity can
|
||||||
|
* be brought back with the data columns
|
||||||
|
*/
|
||||||
|
raidz_restore_orig_parity(rr, orig);
|
||||||
|
|
||||||
|
/* return reconstructed columns to memory */
|
||||||
|
const int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_FALSE, B_TRUE);
|
||||||
|
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* continue to software, so redo the
|
||||||
|
* original moving of parity columns
|
||||||
|
*/
|
||||||
|
raidz_move_orig_parity(zio, rr, orig);
|
||||||
|
vdev_raidz_generate_parity_row(rm, rr);
|
||||||
|
}
|
||||||
|
|
||||||
for (c = 0; c < rr->rr_firstdatacol; c++) {
|
for (c = 0; c < rr->rr_firstdatacol; c++) {
|
||||||
rc = &rr->rr_col[c];
|
rc = &rr->rr_col[c];
|
||||||
|
@ -2690,7 +2788,70 @@ raidz_parity_verify(zio_t *zio, raidz_row_t *rr)
|
||||||
if (!rc->rc_tried || rc->rc_error != 0)
|
if (!rc->rc_tried || rc->rc_error != 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (abd_cmp(orig[c], rc->rc_abd) != 0) {
|
int cmp = 0;
|
||||||
|
if (zia_raidz_cmp(orig[c], rc->rc_abd, &cmp) != ZIA_OK) {
|
||||||
|
if (zia_is_offloaded(zio->io_abd) ||
|
||||||
|
rr->rr_zia_handle) {
|
||||||
|
/*
|
||||||
|
* should only need to onload orig[c] and
|
||||||
|
* rc but onloading everything to not create
|
||||||
|
* inconsistent rr state
|
||||||
|
*/
|
||||||
|
int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_FALSE, B_TRUE);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rr->rr_firstdatacol;
|
||||||
|
i++) {
|
||||||
|
if (orig[i]) {
|
||||||
|
zia_rc = zia_worst_error(zia_rc,
|
||||||
|
zia_onload_abd(orig[i],
|
||||||
|
orig[i]->abd_size,
|
||||||
|
B_FALSE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
/*
|
||||||
|
* get original parity columns back to
|
||||||
|
* get the original in-memory data
|
||||||
|
*/
|
||||||
|
raidz_restore_orig_parity(rr, orig);
|
||||||
|
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cmp = abd_cmp(orig[c], rc->rc_abd);
|
||||||
|
}
|
||||||
|
if (cmp != 0) {
|
||||||
|
if (zia_is_offloaded(zio->io_abd) ||
|
||||||
|
rr->rr_zia_handle) {
|
||||||
|
/*
|
||||||
|
* should only need to onload orig[c] and
|
||||||
|
* rc but onloading everything to not create
|
||||||
|
* inconsistent rr state
|
||||||
|
*/
|
||||||
|
int zia_rc = zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_FALSE, B_TRUE);
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rr->rr_firstdatacol;
|
||||||
|
i++) {
|
||||||
|
if (orig[i]) {
|
||||||
|
zia_rc = zia_worst_error(zia_rc,
|
||||||
|
zia_onload_abd(orig[i],
|
||||||
|
orig[i]->abd_size,
|
||||||
|
B_FALSE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
/*
|
||||||
|
* get original parity columns back to
|
||||||
|
* get the original in-memory data
|
||||||
|
*/
|
||||||
|
raidz_restore_orig_parity(rr, orig);
|
||||||
|
return (VDEV_RAIDZ_MAXPARITY + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
zfs_dbgmsg("found error on col=%u devidx=%u off %llx",
|
zfs_dbgmsg("found error on col=%u devidx=%u off %llx",
|
||||||
c, (int)rc->rc_devidx, (u_longlong_t)rc->rc_offset);
|
c, (int)rc->rc_devidx, (u_longlong_t)rc->rc_offset);
|
||||||
vdev_raidz_checksum_error(zio, rc, orig[c]);
|
vdev_raidz_checksum_error(zio, rc, orig[c]);
|
||||||
|
@ -2716,7 +2877,7 @@ vdev_raidz_worst_error(raidz_row_t *rr)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static int
|
||||||
vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||||
{
|
{
|
||||||
int unexpected_errors = 0;
|
int unexpected_errors = 0;
|
||||||
|
@ -2758,6 +2919,10 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||||
(zio->io_flags & ZIO_FLAG_RESILVER)) {
|
(zio->io_flags & ZIO_FLAG_RESILVER)) {
|
||||||
int n = raidz_parity_verify(zio, rr);
|
int n = raidz_parity_verify(zio, rr);
|
||||||
unexpected_errors += n;
|
unexpected_errors += n;
|
||||||
|
|
||||||
|
if (n != 0) {
|
||||||
|
return (n);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
|
if (zio->io_error == 0 && spa_writeable(zio->io_spa) &&
|
||||||
|
@ -2826,6 +2991,7 @@ vdev_raidz_io_done_verified(zio_t *zio, raidz_row_t *rr)
|
||||||
zio_nowait(cio);
|
zio_nowait(cio);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -2970,15 +3136,43 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
|
||||||
zfs_dbgmsg("reconstruction not possible; "
|
zfs_dbgmsg("reconstruction not possible; "
|
||||||
"too many failures");
|
"too many failures");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* drop offloaded data */
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||||
|
/* no data movement, so errors don't matter */
|
||||||
|
}
|
||||||
raidz_restore_orig_data(rm);
|
raidz_restore_orig_data(rm);
|
||||||
return (EINVAL);
|
return (EINVAL);
|
||||||
}
|
}
|
||||||
if (dead_data > 0)
|
|
||||||
vdev_raidz_reconstruct_row(rm, rr, my_tgts, t);
|
if (dead_data > 0) {
|
||||||
|
/*
|
||||||
|
* here instead of vdev_raidz_reconstruct_row
|
||||||
|
* to be able to use zio
|
||||||
|
*/
|
||||||
|
if (zia_raidz_rec(rr, my_tgts, t) != ZIA_OK) {
|
||||||
|
int ret = ZIA_OK;
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
ret = zia_worst_error(ret,
|
||||||
|
zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_FALSE, B_TRUE));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret != ZIA_OK) &&
|
||||||
|
(ret != ZIA_ACCELERATOR_DOWN)) {
|
||||||
|
vdev_raidz_reconstruct_row(rm, rr,
|
||||||
|
my_tgts, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for success */
|
/* Check for success */
|
||||||
if (raidz_checksum_verify(zio) == 0) {
|
if (raidz_checksum_verify(zio) == 0) {
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
/* Reconstruction succeeded - report errors */
|
/* Reconstruction succeeded - report errors */
|
||||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
@ -3008,19 +3202,32 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vdev_raidz_io_done_verified(zio, rr);
|
const int rc =
|
||||||
|
vdev_raidz_io_done_verified(zio, rr);
|
||||||
|
ret = zia_worst_error(ret, rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
zio_checksum_verified(zio);
|
zio_checksum_verified(zio);
|
||||||
|
|
||||||
|
if (ret != ZIA_ACCELERATOR_DOWN) {
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (dbgmsg) {
|
if (dbgmsg) {
|
||||||
zfs_dbgmsg("reconstruction successful "
|
zfs_dbgmsg("reconstruction successful "
|
||||||
"(checksum verified)");
|
"(checksum verified)");
|
||||||
}
|
}
|
||||||
return (0);
|
|
||||||
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reconstruction failed - restore original data */
|
/* Reconstruction failed - restore original data */
|
||||||
|
/* drop offloaded data */
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||||
|
/* no data movement, so errors don't matter */
|
||||||
|
}
|
||||||
raidz_restore_orig_data(rm);
|
raidz_restore_orig_data(rm);
|
||||||
if (dbgmsg) {
|
if (dbgmsg) {
|
||||||
zfs_dbgmsg("raidz_reconstruct_expanded(zio=%px) checksum "
|
zfs_dbgmsg("raidz_reconstruct_expanded(zio=%px) checksum "
|
||||||
|
@ -3128,6 +3335,9 @@ vdev_raidz_combrec(zio_t *zio)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
int err = raidz_reconstruct(zio, ltgts, num_failures,
|
int err = raidz_reconstruct(zio, ltgts, num_failures,
|
||||||
nparity);
|
nparity);
|
||||||
|
if (err == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
return (err);
|
||||||
|
}
|
||||||
if (err == EINVAL) {
|
if (err == EINVAL) {
|
||||||
/*
|
/*
|
||||||
* Reconstruction not possible with this #
|
* Reconstruction not possible with this #
|
||||||
|
@ -3316,6 +3526,18 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
|
||||||
|
|
||||||
ASSERT(rr->rr_firstdatacol >= n);
|
ASSERT(rr->rr_firstdatacol >= n);
|
||||||
|
|
||||||
|
if (zia_raidz_rec(rr, tgts, n) == ZIA_OK) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* drop handles instead of onloading
|
||||||
|
*
|
||||||
|
* return value doesn't matter because
|
||||||
|
* the data hasn't changed yet
|
||||||
|
*/
|
||||||
|
zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_TRUE, B_FALSE);
|
||||||
vdev_raidz_reconstruct_row(rm, rr, tgts, n);
|
vdev_raidz_reconstruct_row(rm, rr, tgts, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3437,16 +3659,88 @@ vdev_raidz_io_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* the raidz rows should never enter here already offloaded */
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
ASSERT(rr->rr_zia_handle == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* offload once at beginning */
|
||||||
|
blkptr_t *bp = zio->io_bp;
|
||||||
|
if (bp && !BP_IS_METADATA(bp)) {
|
||||||
|
uint_t checksum = (BP_IS_GANG(bp) ?
|
||||||
|
ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp));
|
||||||
|
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||||
|
if (!(ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED)) {
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
/*
|
||||||
|
* Allow unchecked failure since failure
|
||||||
|
* to offload means the software path
|
||||||
|
* will be taken. Whether or not the
|
||||||
|
* provider/offloader is valid
|
||||||
|
* becomes irrelevant.
|
||||||
|
*/
|
||||||
|
zia_raidz_alloc(zio, rr,
|
||||||
|
B_TRUE, checksum, NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
raidz_row_t *rr = rm->rm_row[i];
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
vdev_raidz_io_done_reconstruct_known_missing(zio,
|
vdev_raidz_io_done_reconstruct_known_missing(zio,
|
||||||
rm, rr);
|
rm, rr);
|
||||||
|
/*
|
||||||
|
* Restarting here is unnecessary. If the offloader
|
||||||
|
* failed, the offloaded data is still in sync with
|
||||||
|
* the in-memory data, and falling back reconstructed
|
||||||
|
* using the correct data.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
if (raidz_checksum_verify(zio) == 0) {
|
int ret = raidz_checksum_verify(zio);
|
||||||
|
|
||||||
|
/* ZIA_ACCELERATOR_DOWN is a completely orthogonal error */
|
||||||
|
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||||
for (int i = 0; i < rm->rm_nrows; i++) {
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
raidz_row_t *rr = rm->rm_row[i];
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
vdev_raidz_io_done_verified(zio, rr);
|
zia_raidz_rec_cleanup(zio, rr, B_TRUE, B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
zio->io_can_offload = B_FALSE;
|
||||||
|
zio_vdev_io_redone(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == 0) {
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
ret =
|
||||||
|
vdev_raidz_io_done_verified(zio, rr);
|
||||||
|
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
for (int j = 0; j < rm->rm_nrows; j++) {
|
||||||
|
rr = rm->rm_row[j];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vdev_raidz_io_done_verified
|
||||||
|
* will have already attempted
|
||||||
|
* to load reconstructed data
|
||||||
|
* back into memory, so this
|
||||||
|
* line should just drop any
|
||||||
|
* remaining handles
|
||||||
|
*
|
||||||
|
* not sure why onload_parity
|
||||||
|
* has to be set to B_TRUE
|
||||||
|
*/
|
||||||
|
zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_TRUE, B_TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
zio->io_can_offload = B_FALSE;
|
||||||
|
zio_vdev_io_redone(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
zio_checksum_verified(zio);
|
zio_checksum_verified(zio);
|
||||||
} else {
|
} else {
|
||||||
|
@ -3473,6 +3767,12 @@ vdev_raidz_io_done(zio_t *zio)
|
||||||
rm->rm_row[i]);
|
rm->rm_row[i]);
|
||||||
}
|
}
|
||||||
if (nread != 0) {
|
if (nread != 0) {
|
||||||
|
/* drop handles */
|
||||||
|
for (int i = 0; i < rm->rm_nrows; i++) {
|
||||||
|
raidz_row_t *rr = rm->rm_row[i];
|
||||||
|
zia_raidz_rec_cleanup(zio, rr,
|
||||||
|
B_TRUE, B_FALSE);
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Normally our stage is VDEV_IO_DONE, but if
|
* Normally our stage is VDEV_IO_DONE, but if
|
||||||
* we've already called redone(), it will have
|
* we've already called redone(), it will have
|
||||||
|
@ -3532,6 +3832,14 @@ vdev_raidz_io_done(zio_t *zio)
|
||||||
* that is also a known failure, that's fine.
|
* that is also a known failure, that's fine.
|
||||||
*/
|
*/
|
||||||
zio->io_error = vdev_raidz_combrec(zio);
|
zio->io_error = vdev_raidz_combrec(zio);
|
||||||
|
|
||||||
|
if (zio->io_error == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zio->io_error = 0;
|
||||||
|
zio->io_can_offload = B_FALSE;
|
||||||
|
zio_vdev_io_redone(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (zio->io_error == ECKSUM &&
|
if (zio->io_error == ECKSUM &&
|
||||||
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
||||||
vdev_raidz_io_done_unrecoverable(zio);
|
vdev_raidz_io_done_unrecoverable(zio);
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,208 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef ZIA
|
||||||
|
|
||||||
|
#include <sys/vdev.h>
|
||||||
|
#include <sys/vdev_disk.h>
|
||||||
|
#include <sys/vdev_raidz_impl.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
|
#include <sys/zia_cddl.h>
|
||||||
|
#include <sys/zia_private.h>
|
||||||
|
#include <sys/zio_compress.h>
|
||||||
|
|
||||||
|
/* basically a duplicate of zio_compress_data */
|
||||||
|
int
|
||||||
|
zia_compress_impl(const dpusm_uf_t *dpusm, zia_props_t *props,
|
||||||
|
enum zio_compress c, abd_t *src, size_t s_len,
|
||||||
|
void **cbuf_handle, uint64_t *c_len,
|
||||||
|
uint8_t level, boolean_t *local_offload)
|
||||||
|
{
|
||||||
|
size_t d_len;
|
||||||
|
uint8_t complevel;
|
||||||
|
zio_compress_info_t *ci = &zio_compress_table[c];
|
||||||
|
int ret = ZIA_OK;
|
||||||
|
|
||||||
|
ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
|
||||||
|
ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the data is all zeros, we don't even need to allocate
|
||||||
|
* a block for it. We indicate this by returning zero size.
|
||||||
|
*/
|
||||||
|
if (!ABD_HANDLE(src)) {
|
||||||
|
/* check in-memory buffer for zeros */
|
||||||
|
if (abd_cmp_zero(src, s_len) == 0) {
|
||||||
|
*c_len = 0;
|
||||||
|
return (ZIA_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == ZIO_COMPRESS_EMPTY) {
|
||||||
|
*c_len = s_len;
|
||||||
|
return (ZIA_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check that compression can be done before offloading */
|
||||||
|
dpusm_pc_t *caps = NULL;
|
||||||
|
if ((zia_get_capabilities(props->provider, &caps) != ZIA_OK) ||
|
||||||
|
!(caps->compress & compress_to_dpusm(c))) {
|
||||||
|
return (ZIA_FALLBACK);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = zia_offload_abd(props->provider, src, s_len,
|
||||||
|
props->min_offload_size, local_offload, B_FALSE);
|
||||||
|
if (ret != ZIA_OK) {
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* came in offloaded */
|
||||||
|
void *old_provider = dpusm->extract(ABD_HANDLE(src));
|
||||||
|
if (old_provider != props->provider) {
|
||||||
|
return (ZIA_PROVIDER_MISMATCH);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* use provider to check for zero buffer */
|
||||||
|
ret = dpusm->all_zeros(ABD_HANDLE(src), 0, s_len);
|
||||||
|
if (ret == DPUSM_OK) {
|
||||||
|
*c_len = 0;
|
||||||
|
return (ZIA_OK);
|
||||||
|
} else if (ret != DPUSM_BAD_RESULT) {
|
||||||
|
return (dpusm_to_ret(ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == ZIO_COMPRESS_EMPTY) {
|
||||||
|
*c_len = s_len;
|
||||||
|
return (ZIA_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
dpusm_pc_t *caps = NULL;
|
||||||
|
ret = zia_get_capabilities(props->provider, &caps);
|
||||||
|
if (ret != ZIA_OK) {
|
||||||
|
return (ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(caps->compress & compress_to_dpusm(c))) {
|
||||||
|
return (ZIA_FALLBACK);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compress at least 12.5% */
|
||||||
|
d_len = s_len - (s_len >> 3);
|
||||||
|
|
||||||
|
complevel = ci->ci_level;
|
||||||
|
|
||||||
|
if (c == ZIO_COMPRESS_ZSTD) {
|
||||||
|
/* If we don't know the level, we can't compress it */
|
||||||
|
if (level == ZIO_COMPLEVEL_INHERIT) {
|
||||||
|
*c_len = s_len;
|
||||||
|
return (ZIA_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (level == ZIO_COMPLEVEL_DEFAULT)
|
||||||
|
complevel = ZIO_ZSTD_LEVEL_DEFAULT;
|
||||||
|
else
|
||||||
|
complevel = level;
|
||||||
|
|
||||||
|
ASSERT3U(complevel, !=, ZIO_COMPLEVEL_INHERIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* nothing to offload, so just allocate space */
|
||||||
|
*cbuf_handle = zia_alloc(props->provider,
|
||||||
|
s_len, props->min_offload_size);
|
||||||
|
if (!*cbuf_handle) {
|
||||||
|
return (ZIA_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DPUSM interface takes in a size_t, not a uint64_t */
|
||||||
|
size_t zia_c_len = (size_t)s_len;
|
||||||
|
ret = dpusm->compress(compress_to_dpusm(c), (int8_t)level,
|
||||||
|
ABD_HANDLE(src), s_len, *cbuf_handle, &zia_c_len);
|
||||||
|
if (ret != DPUSM_OK) {
|
||||||
|
zia_free(cbuf_handle);
|
||||||
|
return (dpusm_to_ret(ret));
|
||||||
|
}
|
||||||
|
|
||||||
|
*c_len = zia_c_len;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return ZIA_OK because this is not an error - it just didn't
|
||||||
|
* compress well. The data will be dropped later on (instead of
|
||||||
|
* onloaded) because c_len is too big.
|
||||||
|
*/
|
||||||
|
if (*c_len > d_len) {
|
||||||
|
*c_len = s_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ZIA_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zia_raidz_rec_impl(const dpusm_uf_t *dpusm,
|
||||||
|
raidz_row_t *rr, int *t, int nt)
|
||||||
|
{
|
||||||
|
int tgts[VDEV_RAIDZ_MAXPARITY];
|
||||||
|
int ntgts = 0;
|
||||||
|
for (int i = 0, c = 0; c < rr->rr_cols; c++) {
|
||||||
|
if (i < nt && c == t[i]) {
|
||||||
|
tgts[ntgts++] = c;
|
||||||
|
i++;
|
||||||
|
} else if (rr->rr_col[c].rc_error != 0) {
|
||||||
|
tgts[ntgts++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(ntgts >= nt);
|
||||||
|
|
||||||
|
return (dpusm->raid.rec(rr->rr_zia_handle,
|
||||||
|
tgts, ntgts));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
/* called by provider */
|
||||||
|
void
|
||||||
|
zia_disk_write_completion(void *zio_ptr, int error)
|
||||||
|
{
|
||||||
|
zio_t *zio = (zio_t *)zio_ptr;
|
||||||
|
zio->io_error = error;
|
||||||
|
ASSERT3S(zio->io_error, >=, 0);
|
||||||
|
if (zio->io_error)
|
||||||
|
vdev_disk_error(zio);
|
||||||
|
|
||||||
|
zio_delay_interrupt(zio);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* called by provider */
|
||||||
|
void
|
||||||
|
zia_disk_flush_completion(void *zio_ptr, int error)
|
||||||
|
{
|
||||||
|
zio_t *zio = (zio_t *)zio_ptr;
|
||||||
|
|
||||||
|
if (zio->io_error && (zio->io_error == EOPNOTSUPP))
|
||||||
|
zio->io_vd->vdev_nowritecache = B_TRUE;
|
||||||
|
|
||||||
|
ASSERT3S(zio->io_error, >=, 0);
|
||||||
|
if (zio->io_error)
|
||||||
|
vdev_disk_error(zio);
|
||||||
|
zio_interrupt(zio);
|
||||||
|
}
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
|
#endif /* ZIA */
|
206
module/zfs/zio.c
206
module/zfs/zio.c
|
@ -51,6 +51,7 @@
|
||||||
#include <sys/trace_zfs.h>
|
#include <sys/trace_zfs.h>
|
||||||
#include <sys/abd.h>
|
#include <sys/abd.h>
|
||||||
#include <sys/dsl_crypt.h>
|
#include <sys/dsl_crypt.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#include <cityhash.h>
|
#include <cityhash.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -448,12 +449,12 @@ zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize,
|
||||||
zio->io_size = size;
|
zio->io_size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
zio_transform_t *
|
||||||
zio_pop_transforms(zio_t *zio)
|
zio_pop_transform(zio_t *zio)
|
||||||
{
|
{
|
||||||
zio_transform_t *zt;
|
zio_transform_t *zt = zio->io_transform_stack;
|
||||||
|
|
||||||
while ((zt = zio->io_transform_stack) != NULL) {
|
if (zt != NULL) {
|
||||||
if (zt->zt_transform != NULL)
|
if (zt->zt_transform != NULL)
|
||||||
zt->zt_transform(zio,
|
zt->zt_transform(zio,
|
||||||
zt->zt_orig_abd, zt->zt_orig_size);
|
zt->zt_orig_abd, zt->zt_orig_size);
|
||||||
|
@ -467,6 +468,15 @@ zio_pop_transforms(zio_t *zio)
|
||||||
|
|
||||||
kmem_free(zt, sizeof (zio_transform_t));
|
kmem_free(zt, sizeof (zio_transform_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return (zt);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zio_pop_transforms(zio_t *zio)
|
||||||
|
{
|
||||||
|
while (zio_pop_transform(zio)) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -487,9 +497,40 @@ static void
|
||||||
zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
|
zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
|
||||||
{
|
{
|
||||||
if (zio->io_error == 0) {
|
if (zio->io_error == 0) {
|
||||||
int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
|
int ret = ZIA_FALLBACK;
|
||||||
zio->io_abd, data, zio->io_size, size,
|
zia_props_t *zia_props = zia_get_props(zio->io_spa);
|
||||||
&zio->io_prop.zp_complevel);
|
if ((zia_props->decompress == 1) &&
|
||||||
|
(zio->io_can_offload == B_TRUE)) {
|
||||||
|
ret = zia_decompress(zia_props,
|
||||||
|
BP_GET_COMPRESS(zio->io_bp),
|
||||||
|
zio->io_abd, zio->io_size,
|
||||||
|
data, size,
|
||||||
|
&zio->io_prop.zp_complevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == ZIA_OK) {
|
||||||
|
ASSERT(zia_is_offloaded(zio->io_abd) == B_TRUE);
|
||||||
|
/*
|
||||||
|
* bring data back into memory since there
|
||||||
|
* are no subsequent offloaded stages
|
||||||
|
*/
|
||||||
|
ret = zia_onload_abd(data, size, B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT(zia_is_offloaded(data) != B_TRUE);
|
||||||
|
/* let abd_free clean up zio->io_abd */
|
||||||
|
|
||||||
|
if (ret == ZIA_OK) {
|
||||||
|
ret = 0;
|
||||||
|
} else {
|
||||||
|
if (ret == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_disable_offloading(zio, B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
|
||||||
|
zio->io_abd, data, zio->io_size, size,
|
||||||
|
&zio->io_prop.zp_complevel);
|
||||||
|
}
|
||||||
|
|
||||||
if (zio_injection_enabled && ret == 0)
|
if (zio_injection_enabled && ret == 0)
|
||||||
ret = zio_handle_fault_injection(zio, EINVAL);
|
ret = zio_handle_fault_injection(zio, EINVAL);
|
||||||
|
@ -801,6 +842,11 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait,
|
||||||
if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
|
if (zio->io_error && !(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE))
|
||||||
*errorp = zio_worst_error(*errorp, zio->io_error);
|
*errorp = zio_worst_error(*errorp, zio->io_error);
|
||||||
pio->io_reexecute |= zio->io_reexecute;
|
pio->io_reexecute |= zio->io_reexecute;
|
||||||
|
if ((zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) &&
|
||||||
|
(zio->io_can_offload != B_TRUE)) {
|
||||||
|
pio->io_flags |= ZIO_FLAG_ZIA_REEXECUTE;
|
||||||
|
pio->io_can_offload = B_FALSE;
|
||||||
|
}
|
||||||
ASSERT3U(*countp, >, 0);
|
ASSERT3U(*countp, >, 0);
|
||||||
|
|
||||||
(*countp)--;
|
(*countp)--;
|
||||||
|
@ -853,6 +899,10 @@ zio_inherit_child_errors(zio_t *zio, enum zio_child c)
|
||||||
{
|
{
|
||||||
if (zio->io_child_error[c] != 0 && zio->io_error == 0)
|
if (zio->io_child_error[c] != 0 && zio->io_error == 0)
|
||||||
zio->io_error = zio->io_child_error[c];
|
zio->io_error = zio->io_child_error[c];
|
||||||
|
|
||||||
|
if (zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) {
|
||||||
|
zio->io_can_offload = B_FALSE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -974,7 +1024,13 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||||
if (zb != NULL)
|
if (zb != NULL)
|
||||||
zio->io_bookmark = *zb;
|
zio->io_bookmark = *zb;
|
||||||
|
|
||||||
|
zio->io_can_offload = zia_get_props(spa)->can_offload;
|
||||||
|
|
||||||
if (pio != NULL) {
|
if (pio != NULL) {
|
||||||
|
if ((pio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) ||
|
||||||
|
(pio->io_can_offload != B_TRUE)) {
|
||||||
|
zio->io_can_offload = B_FALSE;
|
||||||
|
}
|
||||||
zio->io_metaslab_class = pio->io_metaslab_class;
|
zio->io_metaslab_class = pio->io_metaslab_class;
|
||||||
if (zio->io_logical == NULL)
|
if (zio->io_logical == NULL)
|
||||||
zio->io_logical = pio->io_logical;
|
zio->io_logical = pio->io_logical;
|
||||||
|
@ -983,6 +1039,13 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||||
zio_add_child_first(pio, zio);
|
zio_add_child_first(pio, zio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* turn off encryption and dedup if Z.I.A. is used */
|
||||||
|
if (zia_is_used(zio) == B_TRUE) {
|
||||||
|
zio->io_prop.zp_dedup = B_FALSE;
|
||||||
|
zio->io_prop.zp_dedup_verify = B_FALSE;
|
||||||
|
zio->io_prop.zp_encrypt = B_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
taskq_init_ent(&zio->io_tqent);
|
taskq_init_ent(&zio->io_tqent);
|
||||||
|
|
||||||
return (zio);
|
return (zio);
|
||||||
|
@ -1865,24 +1928,92 @@ zio_write_compress(zio_t *zio)
|
||||||
/* If it's a compressed write that is not raw, compress the buffer. */
|
/* If it's a compressed write that is not raw, compress the buffer. */
|
||||||
if (compress != ZIO_COMPRESS_OFF &&
|
if (compress != ZIO_COMPRESS_OFF &&
|
||||||
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
|
!(zio->io_flags & ZIO_FLAG_RAW_COMPRESS)) {
|
||||||
|
boolean_t ran_compress = B_FALSE;
|
||||||
|
boolean_t local_offload = B_FALSE;
|
||||||
|
|
||||||
abd_t *cabd = NULL;
|
abd_t *cabd = NULL;
|
||||||
if (abd_cmp_zero(zio->io_abd, lsize) == 0)
|
if (abd_cmp_zero(zio->io_abd, lsize) == 0) {
|
||||||
psize = 0;
|
psize = 0;
|
||||||
else if (compress == ZIO_COMPRESS_EMPTY)
|
} else if (compress == ZIO_COMPRESS_EMPTY) {
|
||||||
psize = lsize;
|
psize = lsize;
|
||||||
else
|
} else {
|
||||||
psize = zio_compress_data(compress, zio->io_abd, &cabd,
|
int zia_rc = ZIA_FALLBACK;
|
||||||
lsize, zp->zp_complevel);
|
zia_props_t *zia_props = zia_get_props(spa);
|
||||||
|
if ((zia_props->compress == 1) &&
|
||||||
|
(zio->io_can_offload == B_TRUE)) {
|
||||||
|
zia_rc = zia_compress(zia_props, compress,
|
||||||
|
zio->io_abd, lsize, &cabd, &psize,
|
||||||
|
zp->zp_complevel, &local_offload);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (zia_rc != ZIA_OK) {
|
||||||
|
ASSERT(zia_is_offloaded(cabd) == B_FALSE);
|
||||||
|
|
||||||
|
zia_rc = zia_cleanup_abd(zio->io_abd,
|
||||||
|
lsize, local_offload, B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if data has to be brought back for cpu
|
||||||
|
* compression, but could not, restart the
|
||||||
|
* pipeline for this zio (not necessary
|
||||||
|
* in this case, but still doing it here
|
||||||
|
* in case a previous stage is offloaded)
|
||||||
|
*/
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_restart_before_vdev(zio);
|
||||||
|
return (zio);
|
||||||
|
}
|
||||||
|
|
||||||
|
psize = zio_compress_data(compress, zio->io_abd,
|
||||||
|
&cabd, lsize, zp->zp_complevel);
|
||||||
|
}
|
||||||
|
ran_compress = B_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
if (psize == 0) {
|
if (psize == 0) {
|
||||||
|
ASSERT(ran_compress == B_FALSE);
|
||||||
|
ASSERT(zia_is_offloaded(cabd) == B_FALSE);
|
||||||
compress = ZIO_COMPRESS_OFF;
|
compress = ZIO_COMPRESS_OFF;
|
||||||
} else if (psize >= lsize) {
|
} else if (psize >= lsize) {
|
||||||
compress = ZIO_COMPRESS_OFF;
|
compress = ZIO_COMPRESS_OFF;
|
||||||
if (cabd != NULL)
|
if (cabd != NULL) {
|
||||||
abd_free(cabd);
|
abd_free(cabd);
|
||||||
|
}
|
||||||
|
/* source abd is still offloaded */
|
||||||
} else if (!zp->zp_dedup && !zp->zp_encrypt &&
|
} else if (!zp->zp_dedup && !zp->zp_encrypt &&
|
||||||
psize <= BPE_PAYLOAD_SIZE &&
|
psize <= BPE_PAYLOAD_SIZE &&
|
||||||
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
|
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
|
||||||
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
||||||
|
ASSERT(cabd != NULL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove offloaded source abd. Return value does not
|
||||||
|
* matter: if this fails, the data can't be brought back
|
||||||
|
* anyways, but the in-memory version is still valid
|
||||||
|
*
|
||||||
|
* Should onload cabd first, but that might error,
|
||||||
|
* leaving this zio offloaded unnecessarily (next
|
||||||
|
* attempt will run on CPU). Adding zia_cleanup_abd
|
||||||
|
* into the if block seems unnecessary.
|
||||||
|
*/
|
||||||
|
zia_cleanup_abd(zio->io_abd, lsize,
|
||||||
|
local_offload, B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* compressed enough, but not handling embedded
|
||||||
|
* data, so move compressed data back into memory
|
||||||
|
*
|
||||||
|
* if failed, recompress with cpu compression
|
||||||
|
*/
|
||||||
|
const int zia_rc = zia_onload_abd(cabd, psize, B_FALSE);
|
||||||
|
if ((zia_rc != ZIA_OK) &&
|
||||||
|
(zia_rc != ZIA_ERROR) &&
|
||||||
|
(zia_rc != ZIA_DISABLED)) {
|
||||||
|
abd_free(cabd);
|
||||||
|
zia_restart_before_vdev(zio);
|
||||||
|
return (zio);
|
||||||
|
}
|
||||||
|
|
||||||
void *cbuf = abd_borrow_buf_copy(cabd, lsize);
|
void *cbuf = abd_borrow_buf_copy(cabd, lsize);
|
||||||
encode_embedded_bp_compressed(bp,
|
encode_embedded_bp_compressed(bp,
|
||||||
cbuf, compress, lsize, psize);
|
cbuf, compress, lsize, psize);
|
||||||
|
@ -1897,6 +2028,8 @@ zio_write_compress(zio_t *zio)
|
||||||
SPA_FEATURE_EMBEDDED_DATA));
|
SPA_FEATURE_EMBEDDED_DATA));
|
||||||
return (zio);
|
return (zio);
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT(cabd != NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Round compressed size up to the minimum allocation
|
* Round compressed size up to the minimum allocation
|
||||||
* size of the smallest-ashift device, and zero the
|
* size of the smallest-ashift device, and zero the
|
||||||
|
@ -1910,9 +2043,33 @@ zio_write_compress(zio_t *zio)
|
||||||
if (rounded >= lsize) {
|
if (rounded >= lsize) {
|
||||||
compress = ZIO_COMPRESS_OFF;
|
compress = ZIO_COMPRESS_OFF;
|
||||||
abd_free(cabd);
|
abd_free(cabd);
|
||||||
|
zia_cleanup_abd(zio->io_abd, lsize,
|
||||||
|
local_offload, B_FALSE);
|
||||||
psize = lsize;
|
psize = lsize;
|
||||||
} else {
|
} else {
|
||||||
abd_zero_off(cabd, psize, rounded - psize);
|
if (zia_is_offloaded(cabd)) {
|
||||||
|
if (zia_zero_fill(cabd, psize,
|
||||||
|
rounded - psize) != ZIA_OK) {
|
||||||
|
if (zia_onload_abd(cabd, psize,
|
||||||
|
B_FALSE) != ZIA_OK) {
|
||||||
|
abd_free(cabd);
|
||||||
|
zia_cleanup_abd(
|
||||||
|
zio->io_abd,
|
||||||
|
lsize,
|
||||||
|
local_offload,
|
||||||
|
B_FALSE);
|
||||||
|
zia_restart_before_vdev(
|
||||||
|
zio);
|
||||||
|
return (zio);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
zio->io_flags |=
|
||||||
|
ZIO_FLAG_DONT_AGGREGATE;
|
||||||
|
} else {
|
||||||
|
abd_zero_off(cabd, psize,
|
||||||
|
rounded - psize);
|
||||||
|
}
|
||||||
psize = rounded;
|
psize = rounded;
|
||||||
zio_push_transform(zio, cabd,
|
zio_push_transform(zio, cabd,
|
||||||
psize, lsize, NULL);
|
psize, lsize, NULL);
|
||||||
|
@ -4280,6 +4437,13 @@ zio_vdev_io_start(zio_t *zio)
|
||||||
if (zio->io_type == ZIO_TYPE_WRITE) {
|
if (zio->io_type == ZIO_TYPE_WRITE) {
|
||||||
abd_copy(abuf, zio->io_abd, zio->io_size);
|
abd_copy(abuf, zio->io_abd, zio->io_size);
|
||||||
abd_zero_off(abuf, zio->io_size, asize - zio->io_size);
|
abd_zero_off(abuf, zio->io_size, asize - zio->io_size);
|
||||||
|
/*
|
||||||
|
* The Z.I.A. handles of the abds that come here
|
||||||
|
* were not modified and do not get associated with
|
||||||
|
* abuf during the transform. Instead of dropping
|
||||||
|
* the handle and delaying here, let abd_free clean
|
||||||
|
* it up later.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
zio_push_transform(zio, abuf, asize, asize, zio_subblock);
|
zio_push_transform(zio, abuf, asize, asize, zio_subblock);
|
||||||
}
|
}
|
||||||
|
@ -4484,6 +4648,8 @@ zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr)
|
||||||
{
|
{
|
||||||
void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
|
void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
|
||||||
|
|
||||||
|
zia_onload_abd(zio->io_abd, zio->io_size, B_FALSE);
|
||||||
|
|
||||||
abd_copy(abd, zio->io_abd, zio->io_size);
|
abd_copy(abd, zio->io_abd, zio->io_size);
|
||||||
|
|
||||||
zcr->zcr_cbinfo = zio->io_size;
|
zcr->zcr_cbinfo = zio->io_size;
|
||||||
|
@ -4518,7 +4684,9 @@ zio_vdev_io_assess(zio_t *zio)
|
||||||
* On retry, we cut in line in the issue queue, since we don't want
|
* On retry, we cut in line in the issue queue, since we don't want
|
||||||
* compression/checksumming/etc. work to prevent our (cheap) IO reissue.
|
* compression/checksumming/etc. work to prevent our (cheap) IO reissue.
|
||||||
*/
|
*/
|
||||||
if (zio->io_error && vd == NULL &&
|
if (zio->io_error &&
|
||||||
|
!(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) &&
|
||||||
|
vd == NULL &&
|
||||||
!(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) {
|
!(zio->io_flags & (ZIO_FLAG_DONT_RETRY | ZIO_FLAG_IO_RETRY))) {
|
||||||
ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */
|
ASSERT(!(zio->io_flags & ZIO_FLAG_DONT_QUEUE)); /* not a leaf */
|
||||||
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */
|
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_BYPASS)); /* not a leaf */
|
||||||
|
@ -5145,6 +5313,8 @@ zio_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zio->io_error) {
|
if (zio->io_error) {
|
||||||
|
ASSERT(!(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this I/O is attached to a particular vdev,
|
* If this I/O is attached to a particular vdev,
|
||||||
* generate an error message describing the I/O failure
|
* generate an error message describing the I/O failure
|
||||||
|
@ -5179,7 +5349,10 @@ zio_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zio->io_error && zio == zio->io_logical) {
|
if ((zio->io_error ||
|
||||||
|
(zio->io_flags & ZIO_FLAG_ZIA_REEXECUTE) ||
|
||||||
|
0) &&
|
||||||
|
zio == zio->io_logical) {
|
||||||
/*
|
/*
|
||||||
* Determine whether zio should be reexecuted. This will
|
* Determine whether zio should be reexecuted. This will
|
||||||
* propagate all the way to the root via zio_notify_parent().
|
* propagate all the way to the root via zio_notify_parent().
|
||||||
|
@ -5550,6 +5723,7 @@ EXPORT_SYMBOL(zio_buf_alloc);
|
||||||
EXPORT_SYMBOL(zio_data_buf_alloc);
|
EXPORT_SYMBOL(zio_data_buf_alloc);
|
||||||
EXPORT_SYMBOL(zio_buf_free);
|
EXPORT_SYMBOL(zio_buf_free);
|
||||||
EXPORT_SYMBOL(zio_data_buf_free);
|
EXPORT_SYMBOL(zio_data_buf_free);
|
||||||
|
EXPORT_SYMBOL(zio_push_transform);
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_zio, zio_, slow_io_ms, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_zio, zio_, slow_io_ms, INT, ZMOD_RW,
|
||||||
"Max I/O completion time (milliseconds) before marking it as slow");
|
"Max I/O completion time (milliseconds) before marking it as slow");
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include <sys/zio_checksum.h>
|
#include <sys/zio_checksum.h>
|
||||||
#include <sys/zil.h>
|
#include <sys/zil.h>
|
||||||
#include <sys/abd.h>
|
#include <sys/abd.h>
|
||||||
|
#include <sys/zia.h>
|
||||||
#include <zfs_fletcher.h>
|
#include <zfs_fletcher.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -357,6 +358,13 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||||
zio_eck_t eck;
|
zio_eck_t eck;
|
||||||
size_t eck_offset;
|
size_t eck_offset;
|
||||||
|
|
||||||
|
/* not handling embedded checksums, so bring back data */
|
||||||
|
const int zia_rc = zia_cleanup_abd(abd, size, B_FALSE, B_FALSE);
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_restart_before_vdev(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
memset(&saved, 0, sizeof (zio_cksum_t));
|
memset(&saved, 0, sizeof (zio_cksum_t));
|
||||||
|
|
||||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||||
|
@ -403,8 +411,31 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||||
sizeof (zio_cksum_t));
|
sizeof (zio_cksum_t));
|
||||||
} else {
|
} else {
|
||||||
saved = bp->blk_cksum;
|
saved = bp->blk_cksum;
|
||||||
|
|
||||||
|
int zia_rc = ZIA_FALLBACK;
|
||||||
|
|
||||||
|
/* only offload non-embedded checksums */
|
||||||
|
boolean_t local_offload = B_FALSE;
|
||||||
|
zia_props_t *zia_props = zia_get_props(spa);
|
||||||
|
if ((zia_props->checksum == 1) &&
|
||||||
|
(zio->io_can_offload == B_TRUE)) {
|
||||||
|
zia_rc = zia_checksum_compute(zia_props->provider,
|
||||||
|
&cksum, checksum, zio, size, &local_offload);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fall back to ZFS implementation */
|
||||||
|
if (zia_rc != ZIA_OK) {
|
||||||
|
zia_rc = zia_cleanup_abd(abd, size, local_offload,
|
||||||
|
B_FALSE);
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
zia_restart_before_vdev(zio);
|
||||||
|
return;
|
||||||
|
}
|
||||||
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
|
ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
|
||||||
&cksum);
|
&cksum);
|
||||||
|
} else {
|
||||||
|
zio->io_flags |= ZIO_FLAG_DONT_AGGREGATE;
|
||||||
|
}
|
||||||
if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
|
||||||
zio_checksum_handle_crypt(&cksum, &saved, insecure);
|
zio_checksum_handle_crypt(&cksum, &saved, insecure);
|
||||||
bp->blk_cksum = cksum;
|
bp->blk_cksum = cksum;
|
||||||
|
@ -433,6 +464,12 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
||||||
zio_cksum_t verifier;
|
zio_cksum_t verifier;
|
||||||
size_t eck_offset;
|
size_t eck_offset;
|
||||||
|
|
||||||
|
/* not handling embedded checksums, so bring back data */
|
||||||
|
const int zia_rc = zia_cleanup_abd(abd, size, B_FALSE, B_FALSE);
|
||||||
|
if (zia_rc == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
return (zia_rc);
|
||||||
|
}
|
||||||
|
|
||||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||||
zil_chain_t zilc;
|
zil_chain_t zilc;
|
||||||
uint64_t nused;
|
uint64_t nused;
|
||||||
|
@ -494,8 +531,25 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
|
||||||
} else {
|
} else {
|
||||||
byteswap = BP_SHOULD_BYTESWAP(bp);
|
byteswap = BP_SHOULD_BYTESWAP(bp);
|
||||||
expected_cksum = bp->blk_cksum;
|
expected_cksum = bp->blk_cksum;
|
||||||
ci->ci_func[byteswap](abd, size,
|
|
||||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
zia_props_t *zia_props = zia_get_props(spa);
|
||||||
|
int error = ZIA_FALLBACK;
|
||||||
|
if ((zia_props->can_offload == B_TRUE) &&
|
||||||
|
(zia_props->checksum == 1)) {
|
||||||
|
error = zia_checksum_error(checksum, abd, size,
|
||||||
|
byteswap, &actual_cksum);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fall back to ZFS implementation */
|
||||||
|
if ((error != ZIA_OK) && (error != ECKSUM)) {
|
||||||
|
/* data was modified by reconstruction */
|
||||||
|
error = zia_onload_abd(abd, size, B_FALSE);
|
||||||
|
if (error == ZIA_ACCELERATOR_DOWN) {
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
ci->ci_func[byteswap](abd, size,
|
||||||
|
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -0,0 +1,921 @@
|
||||||
|
/*
|
||||||
|
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program was produced under U.S. Government contract
|
||||||
|
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
* is operated by Triad National Security, LLC for the U.S.
|
||||||
|
* Department of Energy/National Nuclear Security Administration. All
|
||||||
|
* rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
* and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
* Administration. The Government is granted for itself and others
|
||||||
|
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
* license in this material to reproduce, prepare derivative works,
|
||||||
|
* distribute copies to the public, perform publicly and display
|
||||||
|
* publicly, and to permit others to do so.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
*
|
||||||
|
* This program is open source under the BSD-3 License.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/random.h>
|
||||||
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#include <sys/abd.h>
|
||||||
|
#include <sys/spa_checksum.h>
|
||||||
|
#include <sys/vdev_disk.h>
|
||||||
|
#include <sys/vdev_raidz.h>
|
||||||
|
#include <sys/vdev_raidz_impl.h>
|
||||||
|
#include <sys/zfs_file.h>
|
||||||
|
#include <sys/zio.h>
|
||||||
|
#include <sys/zmod.h>
|
||||||
|
#include <zfs_fletcher.h>
|
||||||
|
|
||||||
|
#include "kernel_offloader.h"
|
||||||
|
|
||||||
|
static const char NAME[] = "Kernel Offloader";
|
||||||
|
static const size_t NAME_LEN = sizeof (NAME);
|
||||||
|
|
||||||
|
typedef enum kernel_offloader_handle_type {
|
||||||
|
KOH_REAL, /* default type - convert all data into a single blob */
|
||||||
|
KOH_REFERENCE,
|
||||||
|
|
||||||
|
KOH_INVALID,
|
||||||
|
} koht_t;
|
||||||
|
|
||||||
|
/* offloaded data (not defined outside of "hardware") */
|
||||||
|
typedef struct kernel_offloader_handle {
|
||||||
|
koht_t type;
|
||||||
|
void *ptr;
|
||||||
|
size_t size;
|
||||||
|
} koh_t;
|
||||||
|
|
||||||
|
/* **************************************** */
|
||||||
|
/* memory bookkeeping */
|
||||||
|
rwlock_t rwlock; /* atomic ints are not big enough */
|
||||||
|
|
||||||
|
/* never decreases */
|
||||||
|
static size_t total_count; /* number of times alloc/alloc_ref was called */
|
||||||
|
static size_t total_size; /* buffer size */
|
||||||
|
static size_t total_actual; /* buffer size + any extra memory */
|
||||||
|
|
||||||
|
/* currently active */
|
||||||
|
static size_t active_count; /* number of times alloc/alloc_ref was called */
|
||||||
|
static size_t active_size; /* buffer size */
|
||||||
|
static size_t active_actual; /* buffer size + any extra memory */
|
||||||
|
/* **************************************** */
|
||||||
|
|
||||||
|
/* **************************************** */
|
||||||
|
/* set kernel offloader to DOWN state */
|
||||||
|
typedef struct kernel_offloader_down {
|
||||||
|
rwlock_t rwlock;
|
||||||
|
int count;
|
||||||
|
int max;
|
||||||
|
int printed;
|
||||||
|
} kod_t;
|
||||||
|
|
||||||
|
#define kod_init(name, max_val) \
|
||||||
|
do { \
|
||||||
|
rwlock_init(&name.rwlock); \
|
||||||
|
name.count = 0; \
|
||||||
|
name.max = max_val; \
|
||||||
|
name.printed = 0; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kod_inc(name) \
|
||||||
|
do { \
|
||||||
|
write_lock(&name.rwlock); \
|
||||||
|
name.count++; \
|
||||||
|
write_unlock(&name.rwlock); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kod_ret(name) \
|
||||||
|
do { \
|
||||||
|
if (name.max) { \
|
||||||
|
write_lock(&name.rwlock); \
|
||||||
|
if (name.count > name.max) { \
|
||||||
|
if (!name.printed) { \
|
||||||
|
printk("%s\n", #name); \
|
||||||
|
name.printed = 1; \
|
||||||
|
} \
|
||||||
|
write_unlock(&name.rwlock); \
|
||||||
|
return (DPUSM_PROVIDER_INVALIDATED); \
|
||||||
|
} \
|
||||||
|
write_unlock(&name.rwlock); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define kod_run(name) \
|
||||||
|
do { \
|
||||||
|
kod_inc(name); \
|
||||||
|
kod_ret(name); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* can probably do with macros */
|
||||||
|
static kod_t copy_from_generic_down; static int copy_from_generic_down_max = 0;
|
||||||
|
module_param(copy_from_generic_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t copy_to_generic_down; static int copy_to_generic_down_max = 0;
|
||||||
|
module_param(copy_to_generic_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t cmp_down; static int cmp_down_max = 0;
|
||||||
|
module_param(cmp_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t compress_down; static int compress_down_max = 0;
|
||||||
|
module_param(compress_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t checksum_down; static int checksum_down_max = 0;
|
||||||
|
module_param(checksum_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t raidz_gen_down; static int raidz_gen_down_max = 0;
|
||||||
|
module_param(raidz_gen_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t raidz_rec_down; static int raidz_rec_down_max = 0;
|
||||||
|
module_param(raidz_rec_down_max, int, 0660);
|
||||||
|
|
||||||
|
static kod_t disk_write_down; static int disk_write_down_max = 0;
|
||||||
|
module_param(disk_write_down_max, int, 0660);
|
||||||
|
/* **************************************** */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* value used to swizzle the pointer so that
|
||||||
|
* dereferencing the handle will fail
|
||||||
|
*/
|
||||||
|
static void *mask = NULL;
|
||||||
|
void
|
||||||
|
kernel_offloader_init(void)
|
||||||
|
{
|
||||||
|
get_random_bytes(&mask, sizeof (mask));
|
||||||
|
rwlock_init(&rwlock);
|
||||||
|
total_count = 0;
|
||||||
|
total_size = 0;
|
||||||
|
total_actual = 0;
|
||||||
|
active_count = 0;
|
||||||
|
active_size = 0;
|
||||||
|
active_actual = 0;
|
||||||
|
|
||||||
|
kod_init(copy_from_generic_down, copy_from_generic_down_max);
|
||||||
|
kod_init(copy_to_generic_down, copy_to_generic_down_max);
|
||||||
|
kod_init(cmp_down, cmp_down_max);
|
||||||
|
kod_init(compress_down, compress_down_max);
|
||||||
|
kod_init(checksum_down, checksum_down_max);
|
||||||
|
kod_init(raidz_gen_down, raidz_gen_down_max);
|
||||||
|
kod_init(raidz_rec_down, raidz_rec_down_max);
|
||||||
|
kod_init(disk_write_down, disk_write_down_max);
|
||||||
|
|
||||||
|
printk("kernel offloader init: %p\n", mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
kernel_offloader_fini(void)
|
||||||
|
{
|
||||||
|
mask = NULL;
|
||||||
|
|
||||||
|
printk("kernel offloader fini with "
|
||||||
|
"%zu/%zu (actual %zu/%zu) bytes "
|
||||||
|
"in %zu/%zu allocations remaining\n",
|
||||||
|
active_size, total_size,
|
||||||
|
active_actual, total_actual,
|
||||||
|
active_count, total_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get a starting address of a linear koh_t */
|
||||||
|
static void *
|
||||||
|
ptr_start(koh_t *koh, size_t offset)
|
||||||
|
{
|
||||||
|
return (void *)(((uintptr_t)koh->ptr) + offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* convert the actual pointer to a handle (pretend
|
||||||
|
* the data is not accessible from the Z.I.A. base)
|
||||||
|
*/
|
||||||
|
static void *
|
||||||
|
swizzle(void *ptr)
|
||||||
|
{
|
||||||
|
return (ptr?((void *)(((uintptr_t)ptr) ^ ((uintptr_t)mask))):NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convert the handle to a usable pointer */
|
||||||
|
static void *
|
||||||
|
unswizzle(void *handle)
|
||||||
|
{
|
||||||
|
return (swizzle(handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
static koh_t *
|
||||||
|
koh_alloc(size_t size)
|
||||||
|
{
|
||||||
|
koh_t *koh = kmalloc(sizeof (koh_t), GFP_KERNEL);
|
||||||
|
if (koh) {
|
||||||
|
koh->type = KOH_REAL;
|
||||||
|
koh->ptr = kmalloc(size, GFP_KERNEL);
|
||||||
|
koh->size = size;
|
||||||
|
|
||||||
|
write_lock(&rwlock);
|
||||||
|
total_count++;
|
||||||
|
active_count++;
|
||||||
|
|
||||||
|
/* the allocation itself */
|
||||||
|
total_size += size;
|
||||||
|
active_size += size;
|
||||||
|
total_actual += size;
|
||||||
|
active_actual += size;
|
||||||
|
|
||||||
|
/* the wrapper struct */
|
||||||
|
total_actual += sizeof (koh_t);
|
||||||
|
active_actual += sizeof (koh_t);
|
||||||
|
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (koh);
|
||||||
|
}
|
||||||
|
|
||||||
|
static koh_t *
|
||||||
|
koh_alloc_ref(koh_t *src, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
koh_t *ref = NULL;
|
||||||
|
if (src) {
|
||||||
|
koh_t *src_koh = (koh_t *)src;
|
||||||
|
|
||||||
|
if ((offset + size) > src_koh->size) {
|
||||||
|
printk("Error: Cannot reference handle of size %zu "
|
||||||
|
"starting at offset %zu with size %zu\n",
|
||||||
|
src_koh->size, offset, size);
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
ref = kmalloc(sizeof (koh_t), GFP_KERNEL);
|
||||||
|
if (ref) {
|
||||||
|
ref->type = KOH_REFERENCE;
|
||||||
|
ref->ptr = ptr_start(src, offset);
|
||||||
|
ref->size = size;
|
||||||
|
|
||||||
|
write_lock(&rwlock);
|
||||||
|
total_count++;
|
||||||
|
active_count++;
|
||||||
|
|
||||||
|
/* no new requested space */
|
||||||
|
|
||||||
|
/* the wrapper struct */
|
||||||
|
total_actual += sizeof (koh_t);
|
||||||
|
active_actual += sizeof (koh_t);
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_get_size(void *handle, size_t *size, size_t *actual)
|
||||||
|
{
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
|
||||||
|
if (size) {
|
||||||
|
*size = koh->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual) {
|
||||||
|
*actual = koh->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
koh_free(koh_t *koh)
|
||||||
|
{
|
||||||
|
if (koh) {
|
||||||
|
write_lock(&rwlock);
|
||||||
|
switch (koh->type) {
|
||||||
|
case KOH_REAL:
|
||||||
|
/* the allocation itself */
|
||||||
|
active_size -= koh->size;
|
||||||
|
active_actual -= koh->size;
|
||||||
|
kfree(koh->ptr);
|
||||||
|
break;
|
||||||
|
case KOH_REFERENCE:
|
||||||
|
case KOH_INVALID:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* the wrapper struct */
|
||||||
|
active_actual -= sizeof (koh_t);
|
||||||
|
|
||||||
|
active_count--;
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
|
||||||
|
kfree(koh);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
kernel_offloader_alloc(size_t size)
|
||||||
|
{
|
||||||
|
return (swizzle(koh_alloc(size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
kernel_offloader_alloc_ref(void *src_handle, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
return swizzle(koh_alloc_ref(unswizzle(src_handle),
|
||||||
|
offset, size));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_free(void *handle)
|
||||||
|
{
|
||||||
|
koh_free(unswizzle(handle));
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_copy_from_generic(void *handle, size_t offset,
|
||||||
|
const void *src, size_t size)
|
||||||
|
{
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
if (!koh) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((offset + size) > koh->size) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(copy_from_generic_down);
|
||||||
|
|
||||||
|
void *dst = ptr_start(koh, offset);
|
||||||
|
if (memcpy(dst, src, size) != dst) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_copy_to_generic(void *handle, size_t offset,
|
||||||
|
void *dst, size_t size)
|
||||||
|
{
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
if (!koh) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((offset + size) > koh->size) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(copy_to_generic_down);
|
||||||
|
|
||||||
|
if (memcpy(dst, ptr_start(koh, offset), size) != dst) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_cmp(void *lhs_handle, void *rhs_handle, int *diff)
|
||||||
|
{
|
||||||
|
koh_t *lhs = (koh_t *)unswizzle(lhs_handle);
|
||||||
|
koh_t *rhs = (koh_t *)unswizzle(rhs_handle);
|
||||||
|
|
||||||
|
if (!lhs || !rhs || !diff) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(cmp_down);
|
||||||
|
|
||||||
|
size_t len = rhs->size;
|
||||||
|
if (lhs->size != rhs->size) {
|
||||||
|
len =
|
||||||
|
(lhs->size < rhs->size)?lhs->size:rhs->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
*diff = memcmp(ptr_start(lhs, 0),
|
||||||
|
ptr_start(rhs, 0), len);
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_zero_fill(void *handle, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
memset(ptr_start(koh, offset), 0, size);
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_all_zeros(void *handle, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
if (koh->size - offset < size) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t *array = ptr_start(koh, offset);
|
||||||
|
size_t i;
|
||||||
|
for (i = 0; i < size / sizeof (uint64_t); i++) {
|
||||||
|
if (array[i]) {
|
||||||
|
return (KERNEL_OFFLOADER_BAD_RESULT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char *remaining = ptr_start(koh, offset);
|
||||||
|
for (i *= sizeof (uint64_t); i < size; i++) {
|
||||||
|
if (remaining[i]) {
|
||||||
|
return (KERNEL_OFFLOADER_BAD_RESULT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_mem_stats(
|
||||||
|
void *t_count_handle, void *t_size_handle, void *t_actual_handle,
|
||||||
|
void *a_count_handle, void *a_size_handle, void *a_actual_handle)
|
||||||
|
{
|
||||||
|
read_lock(&rwlock);
|
||||||
|
|
||||||
|
if (t_count_handle) {
|
||||||
|
*(size_t *)ptr_start(t_count_handle, 0) =
|
||||||
|
total_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t_size_handle) {
|
||||||
|
*(size_t *)ptr_start(t_size_handle, 0) =
|
||||||
|
total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t_actual_handle) {
|
||||||
|
*(size_t *)ptr_start(t_actual_handle, 0) =
|
||||||
|
total_actual;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_count_handle) {
|
||||||
|
*(size_t *)ptr_start(a_count_handle, 0) =
|
||||||
|
active_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_size_handle) {
|
||||||
|
*(size_t *)ptr_start(a_size_handle, 0) =
|
||||||
|
active_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_actual_handle) {
|
||||||
|
*(size_t *)ptr_start(a_actual_handle, 0) =
|
||||||
|
active_actual;
|
||||||
|
}
|
||||||
|
|
||||||
|
read_unlock(&rwlock);
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* specific implementation */
|
||||||
|
static int
|
||||||
|
kernel_offloader_gzip_compress(koh_t *src, size_t s_len,
|
||||||
|
koh_t *dst, size_t *d_len, int level)
|
||||||
|
{
|
||||||
|
if (z_compress_level(ptr_start(dst, 0), d_len,
|
||||||
|
ptr_start(src, 0), s_len, level) != Z_OK) {
|
||||||
|
if (*d_len != src->size) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
kernel_offloader_lz4_compress(koh_t *src, koh_t *dst,
|
||||||
|
size_t s_len, int level, size_t *c_len)
|
||||||
|
{
|
||||||
|
*c_len = dst->size;
|
||||||
|
|
||||||
|
if (zfs_lz4_compress_buf(ptr_start(src, 0), ptr_start(dst, 0),
|
||||||
|
s_len, *c_len, level) == s_len) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_compress(dpusm_compress_t alg, int level,
|
||||||
|
void *src, size_t s_len, void *dst, void *d_len)
|
||||||
|
{
|
||||||
|
int status = KERNEL_OFFLOADER_UNAVAILABLE;
|
||||||
|
koh_t *src_koh = NULL;
|
||||||
|
koh_t *dst_koh = NULL;
|
||||||
|
koh_t *d_len_koh = NULL;
|
||||||
|
if (!src || !dst || !d_len) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(compress_down);
|
||||||
|
|
||||||
|
src_koh = (koh_t *)unswizzle(src);
|
||||||
|
dst_koh = (koh_t *)unswizzle(dst);
|
||||||
|
d_len_koh = (koh_t *)unswizzle(d_len);
|
||||||
|
|
||||||
|
if ((DPUSM_COMPRESS_GZIP_1 <= alg) &&
|
||||||
|
(alg <= DPUSM_COMPRESS_GZIP_9)) {
|
||||||
|
status = kernel_offloader_gzip_compress(src_koh, s_len,
|
||||||
|
dst_koh, (size_t *)ptr_start(d_len_koh, 0), level);
|
||||||
|
} else if (alg == DPUSM_COMPRESS_LZ4) {
|
||||||
|
status = kernel_offloader_lz4_compress(src_koh, dst_koh, s_len,
|
||||||
|
level, (size_t *)ptr_start(d_len_koh, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return (status);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* specific implementation */
|
||||||
|
static int
|
||||||
|
kernel_offloader_gzip_decompress(koh_t *src, size_t s_len,
|
||||||
|
koh_t *dst, size_t *d_len, int level)
|
||||||
|
{
|
||||||
|
if (z_uncompress(ptr_start(dst, 0), d_len,
|
||||||
|
ptr_start(src, 0), s_len) != Z_OK) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
kernel_offloader_lz4_decompress(koh_t *src, size_t s_len,
|
||||||
|
koh_t *dst, size_t *d_len, int level)
|
||||||
|
{
|
||||||
|
if (zfs_lz4_decompress_buf(ptr_start(src, 0), ptr_start(dst, 0),
|
||||||
|
s_len, *d_len, level) != 0) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_decompress(dpusm_decompress_t alg, void *level,
|
||||||
|
void *src, size_t s_len, void *dst, void *d_len)
|
||||||
|
{
|
||||||
|
int status = KERNEL_OFFLOADER_UNAVAILABLE;
|
||||||
|
koh_t *level_koh = NULL;
|
||||||
|
koh_t *src_koh = NULL;
|
||||||
|
koh_t *dst_koh = NULL;
|
||||||
|
koh_t *d_len_koh = NULL;
|
||||||
|
if (!level || !src || !dst || !d_len) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
level_koh = (koh_t *)unswizzle(level);
|
||||||
|
src_koh = (koh_t *)unswizzle(src);
|
||||||
|
dst_koh = (koh_t *)unswizzle(dst);
|
||||||
|
d_len_koh = (koh_t *)unswizzle(d_len);
|
||||||
|
|
||||||
|
if ((DPUSM_COMPRESS_GZIP_1 <= alg) &&
|
||||||
|
(alg <= DPUSM_COMPRESS_GZIP_9)) {
|
||||||
|
status = kernel_offloader_gzip_decompress(src_koh, s_len,
|
||||||
|
dst_koh, (size_t *)ptr_start(d_len_koh, 0),
|
||||||
|
*(int *)ptr_start(level_koh, 0));
|
||||||
|
} else if (alg == DPUSM_COMPRESS_LZ4) {
|
||||||
|
status = kernel_offloader_lz4_decompress(src_koh, s_len,
|
||||||
|
dst_koh, (size_t *)ptr_start(d_len_koh, 0),
|
||||||
|
*(int *)ptr_start(level_koh, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return (status);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_checksum(dpusm_checksum_t alg,
|
||||||
|
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||||
|
void *cksum, size_t cksum_size)
|
||||||
|
{
|
||||||
|
koh_t *data_koh = (koh_t *)unswizzle(data);
|
||||||
|
if (!data_koh) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
zio_cksum_t zcp;
|
||||||
|
if (cksum_size < sizeof (zcp.zc_word)) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(checksum_down);
|
||||||
|
|
||||||
|
/* compute checksum */
|
||||||
|
|
||||||
|
void *buf = ptr_start(data_koh, 0);
|
||||||
|
|
||||||
|
if (alg == DPUSM_CHECKSUM_FLETCHER_2) {
|
||||||
|
fletcher_init(&zcp);
|
||||||
|
if (order == DPUSM_BYTEORDER_NATIVE) {
|
||||||
|
fletcher_2_native(buf, size, NULL, &zcp);
|
||||||
|
} else {
|
||||||
|
fletcher_2_byteswap(buf, size, NULL, &zcp);
|
||||||
|
}
|
||||||
|
} else if (alg == DPUSM_CHECKSUM_FLETCHER_4) {
|
||||||
|
fletcher_init(&zcp);
|
||||||
|
if (order == DPUSM_BYTEORDER_NATIVE) {
|
||||||
|
fletcher_4_native(buf, size, NULL, &zcp);
|
||||||
|
} else {
|
||||||
|
fletcher_4_byteswap(buf, size, NULL, &zcp);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return (DPUSM_NOT_SUPPORTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(cksum, zcp.zc_word, sizeof (zcp.zc_word));
|
||||||
|
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
kernel_offloader_raidz_alloc(size_t nparity, size_t ndata)
|
||||||
|
{
|
||||||
|
const size_t ncols = nparity + ndata;
|
||||||
|
|
||||||
|
const size_t rr_size = offsetof(raidz_row_t, rr_col[ncols]);
|
||||||
|
raidz_row_t *rr = kzalloc(rr_size, GFP_KERNEL);
|
||||||
|
rr->rr_cols = ncols;
|
||||||
|
rr->rr_firstdatacol = nparity;
|
||||||
|
|
||||||
|
write_lock(&rwlock);
|
||||||
|
total_count++;
|
||||||
|
active_count++;
|
||||||
|
|
||||||
|
/* the op struct does not contribute to buffer allocations */
|
||||||
|
total_actual += rr_size;
|
||||||
|
active_actual += rr_size;
|
||||||
|
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
|
||||||
|
return (swizzle(rr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* attaches a column to the raidz struct */
|
||||||
|
int
|
||||||
|
kernel_offloader_raidz_set_column(void *raidz, uint64_t c,
|
||||||
|
void *col, size_t size)
|
||||||
|
{
|
||||||
|
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(col);
|
||||||
|
|
||||||
|
if (!rr || !koh) {
|
||||||
|
return (DPUSM_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* c is too big */
|
||||||
|
if (c >= rr->rr_cols) {
|
||||||
|
return (DPUSM_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* "active" size is larger than allocated size */
|
||||||
|
if (size > koh->size) {
|
||||||
|
return (DPUSM_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
|
||||||
|
/* clean up old column */
|
||||||
|
abd_free(rc->rc_abd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* rc->rc_abd does not take ownership of koh->ptr,
|
||||||
|
* so don't need to release ownership
|
||||||
|
*/
|
||||||
|
rc->rc_abd = abd_get_from_buf(koh->ptr, size);
|
||||||
|
rc->rc_size = size;
|
||||||
|
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_raidz_free(void *raidz)
|
||||||
|
{
|
||||||
|
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||||
|
if (!rr) {
|
||||||
|
return (DPUSM_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int c = 0; c < rr->rr_cols; c++) {
|
||||||
|
raidz_col_t *rc = &rr->rr_col[c];
|
||||||
|
abd_free(rc->rc_abd);
|
||||||
|
}
|
||||||
|
kfree(rr);
|
||||||
|
|
||||||
|
const size_t rr_size = offsetof(raidz_row_t, rr_col[rr->rr_cols]);
|
||||||
|
|
||||||
|
write_lock(&rwlock);
|
||||||
|
active_count--;
|
||||||
|
active_actual -= rr_size;
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_raidz_gen(void *raidz)
|
||||||
|
{
|
||||||
|
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||||
|
if (!rr) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(raidz_gen_down);
|
||||||
|
|
||||||
|
switch (rr->rr_firstdatacol) {
|
||||||
|
case 1:
|
||||||
|
vdev_raidz_generate_parity_p(rr);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
vdev_raidz_generate_parity_pq(rr);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
vdev_raidz_generate_parity_pqr(rr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_raidz_rec(void *raidz, int *tgts, int ntgts)
|
||||||
|
{
|
||||||
|
raidz_row_t *rr = (raidz_row_t *)unswizzle(raidz);
|
||||||
|
if (!rr) {
|
||||||
|
return (KERNEL_OFFLOADER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
kod_run(raidz_rec_down);
|
||||||
|
|
||||||
|
vdev_raidz_reconstruct_general(rr, tgts, ntgts);
|
||||||
|
|
||||||
|
return (KERNEL_OFFLOADER_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
kernel_offloader_file_open(const char *path, int flags, int mode)
|
||||||
|
{
|
||||||
|
zfs_file_t *fp = NULL;
|
||||||
|
/* on error, fp should still be NULL */
|
||||||
|
zfs_file_open(path, flags, mode, &fp);
|
||||||
|
return (swizzle(fp));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_file_write(void *fp_handle, void *handle, size_t count,
|
||||||
|
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err)
|
||||||
|
{
|
||||||
|
zfs_file_t *fp = (zfs_file_t *)unswizzle(fp_handle);
|
||||||
|
if (!fp) {
|
||||||
|
return (ENODEV);
|
||||||
|
}
|
||||||
|
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
if (!koh) {
|
||||||
|
return (EIO);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!err) {
|
||||||
|
return (EIO);
|
||||||
|
}
|
||||||
|
|
||||||
|
*err = zfs_file_pwrite(fp, ptr_start(koh, 0),
|
||||||
|
count, offset, resid);
|
||||||
|
|
||||||
|
if (*err == 0) {
|
||||||
|
void *zeros = kzalloc(trailing_zeros, GFP_KERNEL);
|
||||||
|
*err = zfs_file_pwrite(fp, zeros,
|
||||||
|
trailing_zeros, offset + count, resid);
|
||||||
|
kfree(zeros);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (*err);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
kernel_offloader_file_close(void *fp_handle)
|
||||||
|
{
|
||||||
|
zfs_file_close(unswizzle(fp_handle));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
kernel_offloader_disk_open(dpusm_dd_t *disk_data)
|
||||||
|
{
|
||||||
|
return (swizzle(disk_data->bdev));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_disk_invalidate(void *disk_handle)
|
||||||
|
{
|
||||||
|
struct block_device *bdev =
|
||||||
|
(struct block_device *)unswizzle(disk_handle);
|
||||||
|
invalidate_bdev(bdev);
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_disk_write(void *disk_handle, void *handle, size_t data_size,
|
||||||
|
size_t trailing_zeros, uint64_t io_offset, int flags,
|
||||||
|
dpusm_disk_write_completion_t write_completion, void *wc_args)
|
||||||
|
{
|
||||||
|
struct block_device *bdev =
|
||||||
|
(struct block_device *)unswizzle(disk_handle);
|
||||||
|
koh_t *koh = (koh_t *)unswizzle(handle);
|
||||||
|
|
||||||
|
const size_t io_size = data_size + trailing_zeros;
|
||||||
|
|
||||||
|
kod_run(disk_write_down);
|
||||||
|
|
||||||
|
if (trailing_zeros) {
|
||||||
|
/* create a copy of the data with the trailing zeros attached */
|
||||||
|
void *copy = kzalloc(io_size, GFP_KERNEL);
|
||||||
|
memcpy(copy, ptr_start(koh, 0), data_size);
|
||||||
|
|
||||||
|
write_lock(&rwlock);
|
||||||
|
/* need to keep copy alive, so replace koh->ptr */
|
||||||
|
if (koh->type == KOH_REAL) {
|
||||||
|
/* subtract size of original koh->ptr */
|
||||||
|
active_size -= koh->size;
|
||||||
|
active_actual -= koh->size;
|
||||||
|
|
||||||
|
kfree(koh->ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
koh->type = KOH_REAL;
|
||||||
|
koh->ptr = copy;
|
||||||
|
koh->size = io_size;
|
||||||
|
|
||||||
|
total_size += io_size;
|
||||||
|
active_size += io_size;
|
||||||
|
total_actual += io_size;
|
||||||
|
active_actual += io_size;
|
||||||
|
|
||||||
|
/* wrapper struct size was not modified */
|
||||||
|
write_unlock(&rwlock);
|
||||||
|
}
|
||||||
|
|
||||||
|
abd_t *abd = abd_get_from_buf(koh->ptr, io_size);
|
||||||
|
zio_push_transform(wc_args, abd, io_size, io_size, NULL);
|
||||||
|
|
||||||
|
/* __vdev_disk_physio already adds write_completion */
|
||||||
|
(void) write_completion;
|
||||||
|
|
||||||
|
return (__vdev_classic_physio(bdev, wc_args,
|
||||||
|
io_size, io_offset, WRITE, flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
kernel_offloader_disk_flush(void *disk_handle,
|
||||||
|
dpusm_disk_flush_completion_t flush_completion, void *fc_args)
|
||||||
|
{
|
||||||
|
struct block_device *bdev =
|
||||||
|
(struct block_device *)unswizzle(disk_handle);
|
||||||
|
|
||||||
|
/* vdev_disk_io_flush already adds flush completion */
|
||||||
|
(void) flush_completion;
|
||||||
|
|
||||||
|
return (vdev_disk_io_flush(bdev, fc_args));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
kernel_offloader_disk_close(void *disk_handle)
|
||||||
|
{}
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program was produced under U.S. Government contract
|
||||||
|
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
* is operated by Triad National Security, LLC for the U.S.
|
||||||
|
* Department of Energy/National Nuclear Security Administration. All
|
||||||
|
* rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
* and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
* Administration. The Government is granted for itself and others
|
||||||
|
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
* license in this material to reproduce, prepare derivative works,
|
||||||
|
* distribute copies to the public, perform publicly and display
|
||||||
|
* publicly, and to permit others to do so.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
*
|
||||||
|
* This program is open source under the BSD-3 License.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _KERNEL_OFFLOADER_H
|
||||||
|
#define _KERNEL_OFFLOADER_H
|
||||||
|
|
||||||
|
#include <linux/blk_types.h>
|
||||||
|
#include <linux/scatterlist.h>
|
||||||
|
|
||||||
|
#include <dpusm/provider_api.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file represents the API provided by a vendor to access their
|
||||||
|
* offloader. The API can be anything the implementor chooses to
|
||||||
|
* expose. There are no limitations on the function signature or
|
||||||
|
* name. They just have to be called correctly in the Z.I.A. provider.
|
||||||
|
* ZFS and Z.I.A. will not need direct access to any data located on
|
||||||
|
* the offloader. Some raw pointers from Z.I.A. will be used directly,
|
||||||
|
* but those will always contain information located in memory.
|
||||||
|
*
|
||||||
|
* -------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* The kernel offloader fakes offloads by copying data into memory
|
||||||
|
* regions distinct from the calling process's memory space. The
|
||||||
|
* corresponding C file conflates the driver and the "physical" device
|
||||||
|
* since both memory spaces are in kernel space and run on the
|
||||||
|
* CPU. This offloader provides opaque pointers to the provider to
|
||||||
|
* simulate handles to inaccessible memory locations. In order to
|
||||||
|
* prevent the handle from being dereferenced and used successfully by
|
||||||
|
* ZFS or Z.I.A., the handle pointer is masked with a random value
|
||||||
|
* generated at load-time. Other offloaders may choose to present
|
||||||
|
* non-void handles.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* return values */
|
||||||
|
#define KERNEL_OFFLOADER_OK 0
|
||||||
|
|
||||||
|
/* function is implemented, but the chosen operation is not implemented */
|
||||||
|
#define KERNEL_OFFLOADER_UNAVAILABLE 1
|
||||||
|
|
||||||
|
/* ran, but could not complete */
|
||||||
|
#define KERNEL_OFFLOADER_ERROR 2
|
||||||
|
|
||||||
|
/* ran, but failed a check on a result */
|
||||||
|
#define KERNEL_OFFLOADER_BAD_RESULT 3
|
||||||
|
|
||||||
|
/* "hardware" went down for some reason (overheated, unplugged, etc.) */
|
||||||
|
#define KERNEL_OFFLOADER_DOWN 4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* init function - this should be the kernel module init, but
|
||||||
|
* kernel offloader is not compiled as a separate kernel module
|
||||||
|
*/
|
||||||
|
void kernel_offloader_init(void);
|
||||||
|
void kernel_offloader_fini(void);
|
||||||
|
|
||||||
|
/* offloader handle access */
|
||||||
|
void *kernel_offloader_alloc(size_t size);
|
||||||
|
void *kernel_offloader_alloc_ref(void *src, size_t offset, size_t size);
|
||||||
|
int kernel_offloader_get_size(void *handle, size_t *size, size_t *actual);
|
||||||
|
int kernel_offloader_free(void *handle);
|
||||||
|
int kernel_offloader_copy_from_generic(void *handle, size_t offset,
|
||||||
|
const void *src, size_t size);
|
||||||
|
int kernel_offloader_copy_to_generic(void *handle, size_t offset,
|
||||||
|
void *dst, size_t size);
|
||||||
|
/* status check */
|
||||||
|
int kernel_offloader_mem_stats(
|
||||||
|
void *t_count_handle, void *t_size_handle, void *t_actual_handle,
|
||||||
|
void *a_count_handle, void *a_size_handle, void *a_actual_handle);
|
||||||
|
int kernel_offloader_cmp(void *lhs_handle, void *rhs_handle, int *diff);
|
||||||
|
int kernel_offloader_zero_fill(void *handle, size_t offset, size_t size);
|
||||||
|
int kernel_offloader_all_zeros(void *handle, size_t offset, size_t size);
|
||||||
|
|
||||||
|
/* ZIO Pipeline Stages */
|
||||||
|
|
||||||
|
int kernel_offloader_compress(dpusm_compress_t alg, int level,
|
||||||
|
void *src, size_t s_len, void *dst, void *d_len);
|
||||||
|
|
||||||
|
int kernel_offloader_decompress(dpusm_compress_t alg, void *level,
|
||||||
|
void *src, size_t s_len, void *dst, void *d_len);
|
||||||
|
|
||||||
|
int kernel_offloader_checksum(dpusm_checksum_t alg,
|
||||||
|
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||||
|
void *cksum, size_t cksum_size);
|
||||||
|
|
||||||
|
void *kernel_offloader_raidz_alloc(size_t nparity, size_t ndata);
|
||||||
|
int kernel_offloader_raidz_set_column(void *raidz, uint64_t c,
|
||||||
|
void *col, size_t size);
|
||||||
|
int kernel_offloader_raidz_free(void *raidz);
|
||||||
|
int kernel_offloader_raidz_gen(void *raidz);
|
||||||
|
int kernel_offloader_raidz_rec(void *raidz, int *tgts, int ntgts);
|
||||||
|
|
||||||
|
/* io */
|
||||||
|
void *kernel_offloader_file_open(const char *path, int flags, int mode);
|
||||||
|
int kernel_offloader_file_write(void *fp_handle, void *handle, size_t count,
|
||||||
|
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err);
|
||||||
|
void kernel_offloader_file_close(void *fp_handle);
|
||||||
|
|
||||||
|
void *kernel_offloader_disk_open(dpusm_dd_t *disk_data);
|
||||||
|
int kernel_offloader_disk_reread_part(void *disk_handle);
|
||||||
|
int kernel_offloader_disk_invalidate(void *disk_handle);
|
||||||
|
int kernel_offloader_disk_write(void *disk_handle, void *handle,
|
||||||
|
size_t data_size, size_t trailing_zeros, uint64_t io_offset, int flags,
|
||||||
|
dpusm_disk_write_completion_t write_completion, void *wc_args);
|
||||||
|
int kernel_offloader_disk_flush(void *disk_handle,
|
||||||
|
dpusm_disk_flush_completion_t flush_completion, void *fc_args);
|
||||||
|
void kernel_offloader_disk_close(void *disk_handle);
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,453 @@
|
||||||
|
/*
|
||||||
|
* © 2021. Triad National Security, LLC. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program was produced under U.S. Government contract
|
||||||
|
* 89233218CNA000001 for Los Alamos National Laboratory (LANL), which
|
||||||
|
* is operated by Triad National Security, LLC for the U.S.
|
||||||
|
* Department of Energy/National Nuclear Security Administration. All
|
||||||
|
* rights in the program are reserved by Triad National Security, LLC,
|
||||||
|
* and the U.S. Department of Energy/National Nuclear Security
|
||||||
|
* Administration. The Government is granted for itself and others
|
||||||
|
* acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
|
||||||
|
* license in this material to reproduce, prepare derivative works,
|
||||||
|
* distribute copies to the public, perform publicly and display
|
||||||
|
* publicly, and to permit others to do so.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
*
|
||||||
|
* This program is open source under the BSD-3 License.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from this
|
||||||
|
* software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This provider communicates with the "kernel offloader", which is
|
||||||
|
* actually just software running on the local kernel.
|
||||||
|
*
|
||||||
|
* Providers and offloaders are usually separate entities. However, to
|
||||||
|
* keep things simple, the kernel offloader is compiled into this
|
||||||
|
* provider.
|
||||||
|
*
|
||||||
|
* Providers run at the same location as ZFS. They are intended to be
|
||||||
|
* small shims that translate between the DPUSM provider API and an
|
||||||
|
* offloader's API (probably a header file analogous to
|
||||||
|
* kernel_offloader.h).
|
||||||
|
*
|
||||||
|
* The method used to communicate between the provider and offloader
|
||||||
|
* is not prescribed by the DPUSM. This allows for vendors to place
|
||||||
|
* their offloaders locally or remotely, and use whatever method they
|
||||||
|
* wish to use to communicate with their offloaders e.g. NVMeOF. The
|
||||||
|
* kernel offloader is local and the communication method to access
|
||||||
|
* the kernel offloader is calling local functions.
|
||||||
|
*
|
||||||
|
* Offloaders are normally expected to be hardware with its own memory
|
||||||
|
* space. In order to simulate copying data to an offloader's memory
|
||||||
|
* space, the kernel offloader allocates new buffers and copies ZFS
|
||||||
|
* data into them, rather than using ZFS data directly. In order to
|
||||||
|
* simulate handles that the provider does not know how to manipulate
|
||||||
|
* or have access to, pointers returned from the kernel offloader are
|
||||||
|
* masked with a random value.
|
||||||
|
*
|
||||||
|
* Note that this provider has to be loaded after ZFS because it
|
||||||
|
* depends on ZFS for its "offload" functionality.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* 1. Reconfigure ZFS with --with-zia=<DPUSM root>
|
||||||
|
*
|
||||||
|
* 2. Create a zpool
|
||||||
|
*
|
||||||
|
* 3. Select this provider with
|
||||||
|
* zpool set zia_provider=zia-software-provider <zpool>
|
||||||
|
*
|
||||||
|
* 4. Enable "offloading" of operations with
|
||||||
|
* zpool set zia_compress=on <zpool>
|
||||||
|
* zpool set zia_decompress=on <zpool>
|
||||||
|
* zpool set zia_checksum=on <zpool>
|
||||||
|
* zpool set zia_raidz1_gen=on <zpool>
|
||||||
|
* zpool set zia_raidz2_gen=on <zpool>
|
||||||
|
* zpool set zia_raidz3_gen=on <zpool>
|
||||||
|
* zpool set zia_raidz1_rec=on <zpool>
|
||||||
|
* zpool set zia_raidz2_rec=on <zpool>
|
||||||
|
* zpool set zia_raidz3_rec=on <zpool>
|
||||||
|
* zpool set zia_file_write=on <zpool>
|
||||||
|
* zpool set zia_disk_write=on <zpool>
|
||||||
|
*
|
||||||
|
* 5. Use the zpool as you would normally
|
||||||
|
*
|
||||||
|
* Notes:
|
||||||
|
* If a ZFS IO stage is not run, enabling a Z.I.A. offload
|
||||||
|
* will have no effect.
|
||||||
|
*
|
||||||
|
* Resilvering requires both zia_checksum and zia_raidz*_rec
|
||||||
|
* to be enabled. Not enabling checksums would cause offloaded
|
||||||
|
* resilvering to fail, and perform the remaining operations
|
||||||
|
* in memory. To avoid the cost of offloading data only to
|
||||||
|
* fail, a check has been inserted to prevent offloading
|
||||||
|
* altogether if zia_checksum is not enabled.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/init.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
#include <dpusm/provider_api.h> /* the DPUSM provider API */
|
||||||
|
#include <kernel_offloader.h> /* provides access to the offloader */
|
||||||
|
|
||||||
|
/* translate from offloader values to DPUSM values */
|
||||||
|
static int
|
||||||
|
translate_rc(const int offloader_rc)
|
||||||
|
{
|
||||||
|
int dpusm_rc = DPUSM_NOT_IMPLEMENTED;
|
||||||
|
switch (offloader_rc) {
|
||||||
|
case KERNEL_OFFLOADER_OK:
|
||||||
|
dpusm_rc = DPUSM_OK;
|
||||||
|
break;
|
||||||
|
case KERNEL_OFFLOADER_ERROR:
|
||||||
|
dpusm_rc = DPUSM_ERROR;
|
||||||
|
break;
|
||||||
|
case KERNEL_OFFLOADER_UNAVAILABLE:
|
||||||
|
dpusm_rc = DPUSM_NOT_IMPLEMENTED;
|
||||||
|
break;
|
||||||
|
case KERNEL_OFFLOADER_BAD_RESULT:
|
||||||
|
dpusm_rc = DPUSM_BAD_RESULT;
|
||||||
|
break;
|
||||||
|
case KERNEL_OFFLOADER_DOWN:
|
||||||
|
dpusm_rc = DPUSM_PROVIDER_INVALIDATED;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* only translate recognized values */
|
||||||
|
dpusm_rc = offloader_rc;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return (dpusm_rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_algorithms(int *compress, int *decompress,
|
||||||
|
int *checksum, int *checksum_byteorder, int *raid)
|
||||||
|
{
|
||||||
|
*compress =
|
||||||
|
DPUSM_COMPRESS_GZIP_1 |
|
||||||
|
DPUSM_COMPRESS_GZIP_2 |
|
||||||
|
DPUSM_COMPRESS_GZIP_3 |
|
||||||
|
DPUSM_COMPRESS_GZIP_4 |
|
||||||
|
DPUSM_COMPRESS_GZIP_5 |
|
||||||
|
DPUSM_COMPRESS_GZIP_6 |
|
||||||
|
DPUSM_COMPRESS_GZIP_7 |
|
||||||
|
DPUSM_COMPRESS_GZIP_8 |
|
||||||
|
DPUSM_COMPRESS_GZIP_9 |
|
||||||
|
DPUSM_COMPRESS_LZ4;
|
||||||
|
|
||||||
|
*decompress = *compress;
|
||||||
|
|
||||||
|
*checksum = DPUSM_CHECKSUM_FLETCHER_2 | DPUSM_CHECKSUM_FLETCHER_4;
|
||||||
|
|
||||||
|
*checksum_byteorder = DPUSM_BYTEORDER_NATIVE | DPUSM_BYTEORDER_BYTESWAP;
|
||||||
|
|
||||||
|
*raid =
|
||||||
|
DPUSM_RAID_1_GEN |
|
||||||
|
DPUSM_RAID_2_GEN |
|
||||||
|
DPUSM_RAID_3_GEN |
|
||||||
|
DPUSM_RAID_1_REC |
|
||||||
|
DPUSM_RAID_2_REC |
|
||||||
|
DPUSM_RAID_3_REC;
|
||||||
|
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_get_size(void *handle, size_t *size, size_t *actual)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_get_size(handle,
|
||||||
|
size, actual)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_copy_from_generic(dpusm_mv_t *mv, const void *buf, size_t size)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_copy_from_generic(mv->handle,
|
||||||
|
mv->offset, buf, size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_copy_to_generic(dpusm_mv_t *mv, void *buf, size_t size)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_copy_to_generic(mv->handle,
|
||||||
|
mv->offset, buf, size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_mem_stats(size_t *t_count, size_t *t_size, size_t *t_actual,
|
||||||
|
size_t *a_count, size_t *a_size, size_t *a_actual)
|
||||||
|
{
|
||||||
|
void *t_count_handle = NULL;
|
||||||
|
void *t_size_handle = NULL;
|
||||||
|
void *t_actual_handle = NULL;
|
||||||
|
void *a_size_handle = NULL;
|
||||||
|
void *a_count_handle = NULL;
|
||||||
|
void *a_actual_handle = NULL;
|
||||||
|
|
||||||
|
if (t_count) {
|
||||||
|
t_count_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t_size) {
|
||||||
|
t_size_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (t_actual) {
|
||||||
|
t_actual_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_count) {
|
||||||
|
a_count_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_size) {
|
||||||
|
a_size_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (a_actual) {
|
||||||
|
a_actual_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
const int rc = kernel_offloader_mem_stats(t_count, t_size, t_actual,
|
||||||
|
a_count, a_size, a_actual);
|
||||||
|
if (rc == KERNEL_OFFLOADER_OK) {
|
||||||
|
/* should probably check for errors */
|
||||||
|
kernel_offloader_copy_to_generic(t_count_handle, 0,
|
||||||
|
t_count, sizeof (*t_count));
|
||||||
|
kernel_offloader_copy_to_generic(t_size_handle, 0,
|
||||||
|
t_size, sizeof (*t_size));
|
||||||
|
kernel_offloader_copy_to_generic(t_actual_handle, 0,
|
||||||
|
t_actual, sizeof (*t_actual));
|
||||||
|
kernel_offloader_copy_to_generic(a_count_handle, 0,
|
||||||
|
a_count, sizeof (*a_count));
|
||||||
|
kernel_offloader_copy_to_generic(a_size_handle, 0,
|
||||||
|
a_size, sizeof (*a_size));
|
||||||
|
kernel_offloader_copy_to_generic(a_actual_handle, 0,
|
||||||
|
a_actual, sizeof (*a_actual));
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel_offloader_free(t_size_handle);
|
||||||
|
kernel_offloader_free(t_count_handle);
|
||||||
|
kernel_offloader_free(t_actual_handle);
|
||||||
|
kernel_offloader_free(a_size_handle);
|
||||||
|
kernel_offloader_free(a_count_handle);
|
||||||
|
kernel_offloader_free(a_actual_handle);
|
||||||
|
|
||||||
|
return (translate_rc(rc));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_zero_fill(void *handle, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_zero_fill(handle, offset, size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_all_zeros(void *handle, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_all_zeros(handle, offset, size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_compress(dpusm_compress_t alg, int level,
|
||||||
|
void *src, size_t s_len, void *dst, size_t *d_len)
|
||||||
|
{
|
||||||
|
/* buffer that offloader fills out */
|
||||||
|
void *d_len_handle = kernel_offloader_alloc(sizeof (size_t));
|
||||||
|
|
||||||
|
/* send original d_len to offloader */
|
||||||
|
kernel_offloader_copy_from_generic(d_len_handle, 0,
|
||||||
|
d_len, sizeof (*d_len));
|
||||||
|
|
||||||
|
const int kz_rc = kernel_offloader_compress(alg, level,
|
||||||
|
src, s_len, dst, d_len_handle);
|
||||||
|
if (kz_rc == KERNEL_OFFLOADER_OK) {
|
||||||
|
/* get updated d_len back from offloader */
|
||||||
|
kernel_offloader_copy_to_generic(d_len_handle, 0,
|
||||||
|
d_len, sizeof (*d_len));
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel_offloader_free(d_len_handle);
|
||||||
|
|
||||||
|
return (translate_rc(kz_rc));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_decompress(dpusm_compress_t alg, int *level,
|
||||||
|
void *src, size_t s_len, void *dst, size_t *d_len)
|
||||||
|
{
|
||||||
|
/* buffers that offloader fills out */
|
||||||
|
void *level_handle = kernel_offloader_alloc(sizeof (*level));
|
||||||
|
void *d_len_handle = kernel_offloader_alloc(sizeof (*d_len));
|
||||||
|
|
||||||
|
/* send original d_len to offloader */
|
||||||
|
kernel_offloader_copy_from_generic(d_len_handle, 0,
|
||||||
|
d_len, sizeof (*d_len));
|
||||||
|
|
||||||
|
const int kz_rc = kernel_offloader_decompress(alg, level_handle,
|
||||||
|
src, s_len, dst, d_len_handle);
|
||||||
|
if (kz_rc == KERNEL_OFFLOADER_OK) {
|
||||||
|
/* get updated d_len back from offloader */
|
||||||
|
kernel_offloader_copy_to_generic(d_len_handle, 0,
|
||||||
|
d_len, sizeof (*d_len));
|
||||||
|
kernel_offloader_copy_to_generic(level_handle, 0,
|
||||||
|
level, sizeof (*level));
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel_offloader_free(d_len_handle);
|
||||||
|
kernel_offloader_free(level_handle);
|
||||||
|
|
||||||
|
return (translate_rc(kz_rc));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_checksum(dpusm_checksum_t alg,
|
||||||
|
dpusm_checksum_byteorder_t order, void *data, size_t size,
|
||||||
|
void *cksum, size_t cksum_size)
|
||||||
|
{
|
||||||
|
/* maybe translate alg and order */
|
||||||
|
|
||||||
|
/* trigger offloader to do actual calculation */
|
||||||
|
return (translate_rc(kernel_offloader_checksum(alg,
|
||||||
|
order, data, size, cksum, cksum_size)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_raid_can_compute(size_t nparity, size_t ndata,
|
||||||
|
size_t *col_sizes, int rec)
|
||||||
|
{
|
||||||
|
if ((nparity < 1) || (nparity > 3)) {
|
||||||
|
return (DPUSM_NOT_SUPPORTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (DPUSM_OK);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_raid_gen(void *raid)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_raidz_gen(raid)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_raid_cmp(void *lhs_handle, void *rhs_handle, int *diff)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_cmp(lhs_handle,
|
||||||
|
rhs_handle, diff)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_raid_rec(void *raid, int *tgts, int ntgts)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_raidz_rec(raid,
|
||||||
|
tgts, ntgts)));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
sw_provider_file_write(void *fp_handle, void *handle, size_t count,
|
||||||
|
size_t trailing_zeros, loff_t offset, ssize_t *resid, int *err)
|
||||||
|
{
|
||||||
|
return (translate_rc(kernel_offloader_file_write(fp_handle,
|
||||||
|
handle, count, trailing_zeros, offset, resid, err)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* BEGIN CSTYLED */
|
||||||
|
static const dpusm_pf_t sw_provider_functions = {
|
||||||
|
.algorithms = sw_provider_algorithms,
|
||||||
|
.alloc = kernel_offloader_alloc,
|
||||||
|
.alloc_ref = kernel_offloader_alloc_ref,
|
||||||
|
.get_size = sw_provider_get_size,
|
||||||
|
.free = kernel_offloader_free,
|
||||||
|
.copy = {
|
||||||
|
.from = {
|
||||||
|
.generic = sw_provider_copy_from_generic,
|
||||||
|
.ptr = NULL,
|
||||||
|
.scatterlist = NULL,
|
||||||
|
},
|
||||||
|
.to = {
|
||||||
|
.generic = sw_provider_copy_to_generic,
|
||||||
|
.ptr = NULL,
|
||||||
|
.scatterlist = NULL,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
.mem_stats = sw_provider_mem_stats,
|
||||||
|
.zero_fill = sw_provider_zero_fill,
|
||||||
|
.all_zeros = sw_provider_all_zeros,
|
||||||
|
.compress = sw_provider_compress,
|
||||||
|
.decompress = sw_provider_decompress,
|
||||||
|
.checksum = sw_provider_checksum,
|
||||||
|
.raid = {
|
||||||
|
.can_compute = sw_provider_raid_can_compute,
|
||||||
|
.alloc = kernel_offloader_raidz_alloc,
|
||||||
|
.set_column = kernel_offloader_raidz_set_column,
|
||||||
|
.free = kernel_offloader_raidz_free,
|
||||||
|
.gen = sw_provider_raid_gen,
|
||||||
|
.cmp = sw_provider_raid_cmp,
|
||||||
|
.rec = sw_provider_raid_rec,
|
||||||
|
},
|
||||||
|
.file = {
|
||||||
|
.open = kernel_offloader_file_open,
|
||||||
|
.write = sw_provider_file_write,
|
||||||
|
.close = kernel_offloader_file_close,
|
||||||
|
},
|
||||||
|
.disk = {
|
||||||
|
.open = kernel_offloader_disk_open,
|
||||||
|
.invalidate = kernel_offloader_disk_invalidate,
|
||||||
|
.write = kernel_offloader_disk_write,
|
||||||
|
.flush = kernel_offloader_disk_flush,
|
||||||
|
.close = kernel_offloader_disk_close,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
/* END CSTYLED */
|
||||||
|
|
||||||
|
static int __init
|
||||||
|
sw_provider_init(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* this should be a separate kernel module,
|
||||||
|
* but is here for simplicity
|
||||||
|
*/
|
||||||
|
kernel_offloader_init();
|
||||||
|
|
||||||
|
return (dpusm_register_bsd(THIS_MODULE, &sw_provider_functions));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __exit
|
||||||
|
sw_provider_exit(void)
|
||||||
|
{
|
||||||
|
dpusm_unregister_bsd(THIS_MODULE);
|
||||||
|
|
||||||
|
kernel_offloader_fini();
|
||||||
|
}
|
||||||
|
|
||||||
|
module_init(sw_provider_init);
|
||||||
|
module_exit(sw_provider_exit);
|
||||||
|
|
||||||
|
MODULE_LICENSE("CDDL");
|
|
@ -38,6 +38,7 @@
|
||||||
|
|
||||||
%bcond_with debug
|
%bcond_with debug
|
||||||
%bcond_with debuginfo
|
%bcond_with debuginfo
|
||||||
|
%bcond_with zia
|
||||||
|
|
||||||
|
|
||||||
Name: %{module}-kmod
|
Name: %{module}-kmod
|
||||||
|
@ -124,6 +125,12 @@ bash %{SOURCE10} --target %{_target_cpu} %{?repo:--repo %{?repo}} --kmodname %{
|
||||||
%define debuginfo --disable-debuginfo
|
%define debuginfo --disable-debuginfo
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%if %{with zia}
|
||||||
|
%define zia --with-zia="%{?DPUSM_ROOT}"
|
||||||
|
%else
|
||||||
|
%define zia --without-zia
|
||||||
|
%endif
|
||||||
|
|
||||||
# Leverage VPATH from configure to avoid making multiple copies.
|
# Leverage VPATH from configure to avoid making multiple copies.
|
||||||
%define _configure ../%{module}-%{version}/configure
|
%define _configure ../%{module}-%{version}/configure
|
||||||
|
|
||||||
|
@ -144,7 +151,9 @@ for kernel_version in %{?kernel_versions}; do
|
||||||
%{debuginfo} \
|
%{debuginfo} \
|
||||||
%{?kernel_cc} \
|
%{?kernel_cc} \
|
||||||
%{?kernel_ld} \
|
%{?kernel_ld} \
|
||||||
%{?kernel_llvm}
|
%{?kernel_llvm} \
|
||||||
|
%{zia}
|
||||||
|
|
||||||
|
|
||||||
# Pre-6.10 kernel builds didn't need to copy over the source files to the
|
# Pre-6.10 kernel builds didn't need to copy over the source files to the
|
||||||
# build directory. However we do need to do it though post-6.10 due to
|
# build directory. However we do need to do it though post-6.10 due to
|
||||||
|
|
|
@ -68,6 +68,7 @@
|
||||||
%bcond_with systemd
|
%bcond_with systemd
|
||||||
%bcond_with pam
|
%bcond_with pam
|
||||||
%bcond_without pyzfs
|
%bcond_without pyzfs
|
||||||
|
%bcond_with zia
|
||||||
|
|
||||||
# Generic enable switch for systemd
|
# Generic enable switch for systemd
|
||||||
%if %{with systemd}
|
%if %{with systemd}
|
||||||
|
@ -390,6 +391,12 @@ support for unlocking datasets on user login.
|
||||||
%define pam --disable-pam
|
%define pam --disable-pam
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%if %{with zia}
|
||||||
|
%define zia --with-zia="%{DPUSM_ROOT}"
|
||||||
|
%else
|
||||||
|
%define zia --without-zia
|
||||||
|
%endif
|
||||||
|
|
||||||
%setup -q
|
%setup -q
|
||||||
|
|
||||||
%build
|
%build
|
||||||
|
@ -409,7 +416,8 @@ support for unlocking datasets on user login.
|
||||||
%{ubsan} \
|
%{ubsan} \
|
||||||
%{systemd} \
|
%{systemd} \
|
||||||
%{pam} \
|
%{pam} \
|
||||||
%{pyzfs}
|
%{pyzfs} \
|
||||||
|
%{zia}
|
||||||
make %{?_smp_mflags}
|
make %{?_smp_mflags}
|
||||||
|
|
||||||
%install
|
%install
|
||||||
|
|
|
@ -59,6 +59,12 @@ fi
|
||||||
%define debuginfo --disable-debuginfo
|
%define debuginfo --disable-debuginfo
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%if %{with zia}
|
||||||
|
%define zia --with-zia="%{?DPUSM_ROOT}"
|
||||||
|
%else
|
||||||
|
%define zia --without-zia
|
||||||
|
%endif
|
||||||
|
|
||||||
%setup -n %{kmod_name}-%{version}
|
%setup -n %{kmod_name}-%{version}
|
||||||
%build
|
%build
|
||||||
%configure \
|
%configure \
|
||||||
|
@ -69,7 +75,8 @@ fi
|
||||||
%{debuginfo} \
|
%{debuginfo} \
|
||||||
%{?kernel_cc} \
|
%{?kernel_cc} \
|
||||||
%{?kernel_ld} \
|
%{?kernel_ld} \
|
||||||
%{?kernel_llvm}
|
%{?kernel_llvm} \
|
||||||
|
%{zia}
|
||||||
make %{?_smp_mflags}
|
make %{?_smp_mflags}
|
||||||
|
|
||||||
# Module signing (modsign)
|
# Module signing (modsign)
|
||||||
|
|
|
@ -222,3 +222,7 @@ tags = ['functional', 'zvol', 'zvol_misc']
|
||||||
tests = ['idmap_mount_001', 'idmap_mount_002', 'idmap_mount_003',
|
tests = ['idmap_mount_001', 'idmap_mount_002', 'idmap_mount_003',
|
||||||
'idmap_mount_004', 'idmap_mount_005']
|
'idmap_mount_004', 'idmap_mount_005']
|
||||||
tags = ['functional', 'idmap_mount']
|
tags = ['functional', 'idmap_mount']
|
||||||
|
|
||||||
|
[tests/functional/zia:Linux]
|
||||||
|
tests = ['zia_props', 'zia_write_pipeline', 'zia_raidz_resilver']
|
||||||
|
tags = ['functional', 'zia']
|
||||||
|
|
|
@ -139,6 +139,7 @@ export SYSTEM_FILES_LINUX='attr
|
||||||
groupdel
|
groupdel
|
||||||
groupmod
|
groupmod
|
||||||
hostid
|
hostid
|
||||||
|
insmod
|
||||||
logger
|
logger
|
||||||
losetup
|
losetup
|
||||||
lsattr
|
lsattr
|
||||||
|
@ -154,6 +155,7 @@ export SYSTEM_FILES_LINUX='attr
|
||||||
nsenter
|
nsenter
|
||||||
parted
|
parted
|
||||||
perf
|
perf
|
||||||
|
rmmod
|
||||||
setfattr
|
setfattr
|
||||||
setpriv
|
setpriv
|
||||||
sha256sum
|
sha256sum
|
||||||
|
|
|
@ -387,7 +387,9 @@ nobase_dist_datadir_zfs_tests_tests_DATA += \
|
||||||
functional/zvol/zvol_misc/zvol_misc_common.kshlib \
|
functional/zvol/zvol_misc/zvol_misc_common.kshlib \
|
||||||
functional/zvol/zvol_swap/zvol_swap.cfg \
|
functional/zvol/zvol_swap/zvol_swap.cfg \
|
||||||
functional/idmap_mount/idmap_mount.cfg \
|
functional/idmap_mount/idmap_mount.cfg \
|
||||||
functional/idmap_mount/idmap_mount_common.kshlib
|
functional/idmap_mount/idmap_mount_common.kshlib \
|
||||||
|
functional/zia/zia.cfg \
|
||||||
|
functional/zia/zia.kshlib
|
||||||
|
|
||||||
nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/acl/off/cleanup.ksh \
|
functional/acl/off/cleanup.ksh \
|
||||||
|
@ -2141,4 +2143,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/idmap_mount/idmap_mount_002.ksh \
|
functional/idmap_mount/idmap_mount_002.ksh \
|
||||||
functional/idmap_mount/idmap_mount_003.ksh \
|
functional/idmap_mount/idmap_mount_003.ksh \
|
||||||
functional/idmap_mount/idmap_mount_004.ksh \
|
functional/idmap_mount/idmap_mount_004.ksh \
|
||||||
functional/idmap_mount/idmap_mount_005.ksh
|
functional/idmap_mount/idmap_mount_005.ksh \
|
||||||
|
functional/zia/cleanup.ksh \
|
||||||
|
functional/zia/setup.ksh \
|
||||||
|
functional/zia/zia_props.ksh \
|
||||||
|
functional/zia/zia_raidz_resilver.ksh \
|
||||||
|
functional/zia/zia_write_pipeline.ksh
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_must dpusm_loaded
|
||||||
|
log_must unload_provider
|
||||||
|
default_cleanup
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||||
|
verify_runnable "global"
|
||||||
|
|
||||||
|
log_must default_zpool
|
||||||
|
|
||||||
|
# dpusm must be loaded before ZFS, but have to check
|
||||||
|
# after creating the zpool because this function uses
|
||||||
|
# the result of 'zpool get zia_available'
|
||||||
|
log_must dpusm_loaded
|
||||||
|
|
||||||
|
log_must load_provider
|
||||||
|
|
||||||
|
log_pass
|
|
@ -0,0 +1,37 @@
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||||
|
# Use is subject to license terms.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2013 by Delphix. All rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
export PROVIDER_MODULE="zia_software_provider"
|
||||||
|
export PROVIDER="zia-software-provider"
|
||||||
|
export BLOCKSZ=8192
|
||||||
|
export NUM_WRITES=65536
|
||||||
|
export DATA="R"
|
||||||
|
export FILENAME="${TESTDIR}/file"
|
||||||
|
export RESILVER_REPLACEMENT="${TEST_BASE_DIR}/replacement"
|
|
@ -0,0 +1,136 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.cfg
|
||||||
|
|
||||||
|
function default_zpool
|
||||||
|
{
|
||||||
|
default_raidz_setup_noexit "${DISKS}"
|
||||||
|
log_must zfs set compression=on "${TESTPOOL}"
|
||||||
|
log_must zfs set checksum=on "${TESTPOOL}"
|
||||||
|
}
|
||||||
|
|
||||||
|
function zia_available
|
||||||
|
{
|
||||||
|
zpool get -H -o value zia_available "${TESTPOOL}"
|
||||||
|
}
|
||||||
|
|
||||||
|
function dpusm_loaded
|
||||||
|
{
|
||||||
|
if [[ "$(zia_available)" == "yes" ]]
|
||||||
|
then
|
||||||
|
lsmod | grep dpusm > /dev/null
|
||||||
|
ret="$?"
|
||||||
|
(( "${ret}" != "0" )) && log_unsupported "dpusm not loaded"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Providers can be loaded at any time after the dpusm
|
||||||
|
#
|
||||||
|
# The software provider must be loaded after ZFS since
|
||||||
|
# it uses ZFS symbols.
|
||||||
|
#
|
||||||
|
# If Z.I.A. is not available, the tests should still pass
|
||||||
|
#
|
||||||
|
function load_provider
|
||||||
|
{
|
||||||
|
if [[ "$(zia_available)" == "yes" ]]
|
||||||
|
then
|
||||||
|
log_must insmod "${SBIN_DIR}/module/${PROVIDER}.ko"
|
||||||
|
log_must zpool set zia_provider="${PROVIDER}" "${TESTPOOL}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function unload_provider
|
||||||
|
{
|
||||||
|
if [[ "$(zia_available)" == "yes" ]]
|
||||||
|
then
|
||||||
|
log_must zpool set zia_provider="" "${TESTPOOL}"
|
||||||
|
log_must rmmod "${PROVIDER_MODULE}"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function offload_all
|
||||||
|
{
|
||||||
|
log_must zpool set zia_compress="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_checksum="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz1_gen="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz2_gen="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz3_gen="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz1_rec="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz2_rec="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz3_rec="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_disk_write="on" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_file_write="on" "${TESTPOOL}"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# loop through each combination of Z.I.A. offloads
|
||||||
|
# and make sure writing works
|
||||||
|
#
|
||||||
|
function loop_offloads_and_write
|
||||||
|
{
|
||||||
|
for comp in on off
|
||||||
|
do
|
||||||
|
log_must zpool set zia_compress="${comp}" "${TESTPOOL}"
|
||||||
|
|
||||||
|
for cksum in on off
|
||||||
|
do
|
||||||
|
log_must zpool set zia_checksum="${cksum}" "${TESTPOOL}"
|
||||||
|
|
||||||
|
for raidz in on off
|
||||||
|
do
|
||||||
|
log_must zpool set zia_raidz1_gen="${raidz}" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz2_gen="${raidz}" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_raidz3_gen="${raidz}" "${TESTPOOL}"
|
||||||
|
|
||||||
|
for diskfile in on off
|
||||||
|
do
|
||||||
|
log_must zpool set zia_disk_write="${diskfile}" "${TESTPOOL}"
|
||||||
|
log_must zpool set zia_file_write="${diskfile}" "${TESTPOOL}"
|
||||||
|
|
||||||
|
log_must file_write -o create -f "${FILENAME}" -b "${BLOCKSZ}" -c "${NUM_WRITES}" -d "${DATA}"
|
||||||
|
log_must ls -l "${FILENAME}"
|
||||||
|
log_must verify_pool "${TESTPOOL}"
|
||||||
|
log_must check_pool_status "${TESTPOOL}" "errors" "No known data errors"
|
||||||
|
log_must rm "${FILENAME}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
# copied from default_raidz_setup_noexit
|
||||||
|
function random_disk
|
||||||
|
{
|
||||||
|
typeset disklist="$*"
|
||||||
|
disks=(${disklist[*]})
|
||||||
|
count="${#disks[*]}"
|
||||||
|
idx="$(($(random 1 ${count}) - 1))"
|
||||||
|
echo "${disks[${idx}]}"
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Z.I.A. zpool settings work
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Turn on all offloads
|
||||||
|
# 2. Run zpool get on each property
|
||||||
|
#
|
||||||
|
|
||||||
|
log_must offload_all
|
||||||
|
|
||||||
|
log_must zpool get zia_available "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_provider "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_compress "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_checksum "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz1_gen "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz2_gen "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz3_gen "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz1_rec "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz2_rec "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_raidz3_rec "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_disk_write "${TESTPOOL}"
|
||||||
|
log_must zpool get zia_file_write "${TESTPOOL}"
|
||||||
|
|
||||||
|
log_pass
|
|
@ -0,0 +1,65 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Z.I.A. RAIDZ Resilver works
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Turn on all offloads
|
||||||
|
# 2. Write data to the zpool
|
||||||
|
# 3. Replace a drive
|
||||||
|
# 4. Resilver the zpool with Z.I.A.
|
||||||
|
# 5. Check for errors
|
||||||
|
#
|
||||||
|
|
||||||
|
log_must truncate -s 4G "${RESILVER_REPLACEMENT}"
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
log_must rm "${RESILVER_REPLACEMENT}"
|
||||||
|
}
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
log_must offload_all
|
||||||
|
|
||||||
|
# write a file
|
||||||
|
log_must file_write -o create -f "${FILENAME}" -b "${BLOCKSZ}" -c "${NUM_WRITES}" -d "${DATA}"
|
||||||
|
log_must ls -l "${FILENAME}"
|
||||||
|
|
||||||
|
# pick a random backing device to offline and replace it
|
||||||
|
bad="$(random_disk ${DISKS})"
|
||||||
|
log_must zpool offline "${TESTPOOL}" "${bad}"
|
||||||
|
log_must zpool replace "${TESTPOOL}" "${bad}" "${RESILVER_REPLACEMENT}"
|
||||||
|
log_must wait_replacing "${TESTPOOL}"
|
||||||
|
|
||||||
|
log_must verify_pool "${TESTPOOL}"
|
||||||
|
log_must check_pool_status "${TESTPOOL}" "errors" "No known data errors"
|
||||||
|
|
||||||
|
log_pass
|
|
@ -0,0 +1,47 @@
|
||||||
|
#!/bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2021 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/zia/zia.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Z.I.A. Write Pipeline works
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Turn each of the offloaded stages on and off
|
||||||
|
# 1.1. Write data to the zpool
|
||||||
|
# 1.2. Delete the file
|
||||||
|
# 2. Disable the provider for the pool and unload the provider
|
||||||
|
# 3. Do 1. again, but without a provider to make sure Z.I.A. falls back to ZFS properly
|
||||||
|
#
|
||||||
|
|
||||||
|
log_must loop_offloads_and_write
|
||||||
|
log_must unload_provider
|
||||||
|
log_must loop_offloads_and_write
|
||||||
|
log_must load_provider
|
||||||
|
|
||||||
|
log_pass
|
Loading…
Reference in New Issue