2013-08-28 11:45:09 +00:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2020-04-23 17:06:57 +00:00
|
|
|
* Copyright (c) 2012, 2020 by Delphix. All rights reserved.
|
2013-05-25 02:06:23 +00:00
|
|
|
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
2017-05-19 19:33:11 +00:00
|
|
|
* Copyright (c) 2017 Datto Inc.
|
2017-06-26 23:56:09 +00:00
|
|
|
* Copyright 2017 RackTop Systems.
|
2017-10-26 19:26:09 +00:00
|
|
|
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
2019-11-11 07:24:14 +00:00
|
|
|
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
|
2013-08-28 11:45:09 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
|
|
|
|
* It has the following characteristics:
|
|
|
|
*
|
|
|
|
* - Thread Safe. libzfs_core is accessible concurrently from multiple
|
|
|
|
* threads. This is accomplished primarily by avoiding global data
|
|
|
|
* (e.g. caching). Since it's thread-safe, there is no reason for a
|
|
|
|
* process to have multiple libzfs "instances". Therefore, we store
|
|
|
|
* our few pieces of data (e.g. the file descriptor) in global
|
|
|
|
* variables. The fd is reference-counted so that the libzfs_core
|
|
|
|
* library can be "initialized" multiple times (e.g. by different
|
|
|
|
* consumers within the same process).
|
|
|
|
*
|
|
|
|
* - Committed Interface. The libzfs_core interface will be committed,
|
|
|
|
* therefore consumers can compile against it and be confident that
|
|
|
|
* their code will continue to work on future releases of this code.
|
|
|
|
* Currently, the interface is Evolving (not Committed), but we intend
|
|
|
|
* to commit to it once it is more complete and we determine that it
|
|
|
|
* meets the needs of all consumers.
|
|
|
|
*
|
2014-06-15 17:17:45 +00:00
|
|
|
* - Programmatic Error Handling. libzfs_core communicates errors with
|
2013-08-28 11:45:09 +00:00
|
|
|
* defined error numbers, and doesn't print anything to stdout/stderr.
|
|
|
|
*
|
|
|
|
* - Thin Layer. libzfs_core is a thin layer, marshaling arguments
|
|
|
|
* to/from the kernel ioctls. There is generally a 1:1 correspondence
|
2019-06-19 19:27:31 +00:00
|
|
|
* between libzfs_core functions and ioctls to ZFS_DEV.
|
2013-08-28 11:45:09 +00:00
|
|
|
*
|
|
|
|
* - Clear Atomicity. Because libzfs_core functions are generally 1:1
|
|
|
|
* with kernel ioctls, and kernel ioctls are general atomic, each
|
|
|
|
* libzfs_core function is atomic. For example, creating multiple
|
|
|
|
* snapshots with a single call to lzc_snapshot() is atomic -- it
|
|
|
|
* can't fail with only some of the requested snapshots created, even
|
|
|
|
* in the event of power loss or system crash.
|
|
|
|
*
|
|
|
|
* - Continued libzfs Support. Some higher-level operations (e.g.
|
|
|
|
* support for "zfs send -R") are too complicated to fit the scope of
|
|
|
|
* libzfs_core. This functionality will continue to live in libzfs.
|
|
|
|
* Where appropriate, libzfs will use the underlying atomic operations
|
|
|
|
* of libzfs_core. For example, libzfs may implement "zfs send -R |
|
|
|
|
* zfs receive" by using individual "send one snapshot", rename,
|
|
|
|
* destroy, and "receive one snapshot" operations in libzfs_core.
|
2019-04-19 19:04:21 +00:00
|
|
|
* /sbin/zfs and /sbin/zpool will link with both libzfs and
|
2013-08-28 11:45:09 +00:00
|
|
|
* libzfs_core. Other consumers should aim to use only libzfs_core,
|
|
|
|
* since that will be the supported, stable interface going forwards.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <libzfs_core.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
#ifdef ZFS_DEBUG
|
|
|
|
#include <stdio.h>
|
|
|
|
#endif
|
2013-08-28 11:45:09 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <pthread.h>
|
2019-11-12 18:40:39 +00:00
|
|
|
#include <libzutil.h>
|
2013-08-28 11:45:09 +00:00
|
|
|
#include <sys/nvpair.h>
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/zfs_ioctl.h>
|
|
|
|
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
static int g_fd = -1;
|
2013-08-28 11:45:09 +00:00
|
|
|
static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
static int g_refcount;
|
|
|
|
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
#ifdef ZFS_DEBUG
|
2020-08-19 01:07:43 +00:00
|
|
|
static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
static zfs_errno_t fail_ioc_err;
|
|
|
|
|
|
|
|
static void
|
|
|
|
libzfs_core_debug_ioc(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* To test running newer user space binaries with kernel's
|
|
|
|
* that don't yet support an ioctl or a new ioctl arg we
|
|
|
|
* provide an override to intentionally fail an ioctl.
|
|
|
|
*
|
|
|
|
* USAGE:
|
|
|
|
* The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
|
|
|
|
*
|
|
|
|
* For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
|
|
|
|
* ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
|
|
|
|
*
|
|
|
|
* $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
|
|
|
|
* cannot checkpoint 'tank': the loaded zfs module does not support
|
|
|
|
* this operation. A reboot may be required to enable this operation.
|
|
|
|
*/
|
2020-08-19 01:07:43 +00:00
|
|
|
if (fail_ioc_cmd == ZFS_IOC_LAST) {
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
char *ioc_test = getenv("ZFS_IOC_TEST");
|
|
|
|
unsigned int ioc_num = 0, ioc_err = 0;
|
|
|
|
|
|
|
|
if (ioc_test != NULL &&
|
|
|
|
sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
|
|
|
|
ioc_num < ZFS_IOC_LAST) {
|
|
|
|
fail_ioc_cmd = ioc_num;
|
|
|
|
fail_ioc_err = ioc_err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
int
|
|
|
|
libzfs_core_init(void)
|
|
|
|
{
|
|
|
|
(void) pthread_mutex_lock(&g_lock);
|
|
|
|
if (g_refcount == 0) {
|
2021-03-09 23:00:43 +00:00
|
|
|
g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
|
2013-08-28 11:45:09 +00:00
|
|
|
if (g_fd < 0) {
|
|
|
|
(void) pthread_mutex_unlock(&g_lock);
|
|
|
|
return (errno);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
g_refcount++;
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
|
|
|
|
#ifdef ZFS_DEBUG
|
|
|
|
libzfs_core_debug_ioc();
|
|
|
|
#endif
|
2013-08-28 11:45:09 +00:00
|
|
|
(void) pthread_mutex_unlock(&g_lock);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
libzfs_core_fini(void)
|
|
|
|
{
|
|
|
|
(void) pthread_mutex_lock(&g_lock);
|
|
|
|
ASSERT3S(g_refcount, >, 0);
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
|
|
|
|
if (g_refcount > 0)
|
|
|
|
g_refcount--;
|
|
|
|
|
|
|
|
if (g_refcount == 0 && g_fd != -1) {
|
2013-08-28 11:45:09 +00:00
|
|
|
(void) close(g_fd);
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
g_fd = -1;
|
|
|
|
}
|
2013-08-28 11:45:09 +00:00
|
|
|
(void) pthread_mutex_unlock(&g_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
lzc_ioctl(zfs_ioc_t ioc, const char *name,
|
|
|
|
nvlist_t *source, nvlist_t **resultp)
|
|
|
|
{
|
2013-09-04 12:00:57 +00:00
|
|
|
zfs_cmd_t zc = {"\0"};
|
2013-08-28 11:45:09 +00:00
|
|
|
int error = 0;
|
2017-05-19 19:33:11 +00:00
|
|
|
char *packed = NULL;
|
|
|
|
size_t size = 0;
|
2013-08-28 11:45:09 +00:00
|
|
|
|
|
|
|
ASSERT3S(g_refcount, >, 0);
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
VERIFY3S(g_fd, !=, -1);
|
2013-08-28 11:45:09 +00:00
|
|
|
|
Add basic zfs ioc input nvpair validation
We want newer versions of libzfs_core to run against an existing
zfs kernel module (i.e. a deferred reboot or module reload after
an update).
Programmatically document, via a zfs_ioc_key_t, the valid arguments
for the ioc commands that rely on nvpair input arguments (i.e. non
legacy commands from libzfs_core). Automatically verify the expected
pairs before dispatching a command.
This initial phase focuses on the non-legacy ioctls. A follow-on
change can address the legacy ioctl input from the zfs_cmd_t.
The zfs_ioc_key_t for zfs_keys_channel_program looks like:
static const zfs_ioc_key_t zfs_keys_channel_program[] = {
{"program", DATA_TYPE_STRING, 0},
{"arg", DATA_TYPE_UNKNOWN, 0},
{"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
{"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
Introduce four input errors to identify specific input failures
(in addition to generic argument value errors like EINVAL, ERANGE,
EBADF, and E2BIG).
ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #7780
2018-09-02 19:14:01 +00:00
|
|
|
#ifdef ZFS_DEBUG
|
|
|
|
if (ioc == fail_ioc_cmd)
|
|
|
|
return (fail_ioc_err);
|
|
|
|
#endif
|
|
|
|
|
2017-05-19 19:33:11 +00:00
|
|
|
if (name != NULL)
|
|
|
|
(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2017-05-19 19:33:11 +00:00
|
|
|
if (source != NULL) {
|
|
|
|
packed = fnvlist_pack(source, &size);
|
|
|
|
zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
|
|
|
|
zc.zc_nvlist_src_size = size;
|
|
|
|
}
|
2013-08-28 11:45:09 +00:00
|
|
|
|
|
|
|
if (resultp != NULL) {
|
2013-09-04 12:00:57 +00:00
|
|
|
*resultp = NULL;
|
2018-02-08 16:24:39 +00:00
|
|
|
if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
|
|
|
|
zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
|
|
|
|
ZCP_ARG_MEMLIMIT);
|
|
|
|
} else {
|
|
|
|
zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
|
|
|
|
}
|
2013-08-28 11:45:09 +00:00
|
|
|
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
|
|
|
|
malloc(zc.zc_nvlist_dst_size);
|
|
|
|
if (zc.zc_nvlist_dst == (uint64_t)0) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-12 18:40:39 +00:00
|
|
|
while (zfs_ioctl_fd(g_fd, ioc, &zc) != 0) {
|
2018-02-08 16:16:23 +00:00
|
|
|
/*
|
|
|
|
* If ioctl exited with ENOMEM, we retry the ioctl after
|
|
|
|
* increasing the size of the destination nvlist.
|
|
|
|
*
|
2018-02-08 16:24:39 +00:00
|
|
|
* Channel programs that exit with ENOMEM ran over the
|
2018-02-08 16:16:23 +00:00
|
|
|
* lua memory sandbox; they should not be retried.
|
|
|
|
*/
|
|
|
|
if (errno == ENOMEM && resultp != NULL &&
|
|
|
|
ioc != ZFS_IOC_CHANNEL_PROGRAM) {
|
2013-08-28 11:45:09 +00:00
|
|
|
free((void *)(uintptr_t)zc.zc_nvlist_dst);
|
|
|
|
zc.zc_nvlist_dst_size *= 2;
|
|
|
|
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
|
|
|
|
malloc(zc.zc_nvlist_dst_size);
|
|
|
|
if (zc.zc_nvlist_dst == (uint64_t)0) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
error = errno;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-10-14 20:33:22 +00:00
|
|
|
if (zc.zc_nvlist_dst_filled && resultp != NULL) {
|
2013-08-28 11:45:09 +00:00
|
|
|
*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
|
|
|
|
zc.zc_nvlist_dst_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
if (packed != NULL)
|
|
|
|
fnvlist_pack_free(packed, size);
|
2013-08-28 11:45:09 +00:00
|
|
|
free((void *)(uintptr_t)zc.zc_nvlist_dst);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
|
|
|
|
uint8_t *wkeydata, uint_t wkeylen)
|
2013-08-28 11:45:09 +00:00
|
|
|
{
|
|
|
|
int error;
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
nvlist_t *hidden_args = NULL;
|
2013-08-28 11:45:09 +00:00
|
|
|
nvlist_t *args = fnvlist_alloc();
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
|
2017-01-23 17:49:57 +00:00
|
|
|
fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
|
2013-08-28 11:45:09 +00:00
|
|
|
if (props != NULL)
|
|
|
|
fnvlist_add_nvlist(args, "props", props);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
|
|
|
|
if (wkeydata != NULL) {
|
|
|
|
hidden_args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
|
|
|
|
wkeylen);
|
|
|
|
fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
|
|
|
|
}
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
nvlist_free(hidden_args);
|
2013-08-28 11:45:09 +00:00
|
|
|
nvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
|
2013-08-28 11:45:09 +00:00
|
|
|
{
|
|
|
|
int error;
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
nvlist_t *hidden_args = NULL;
|
2013-08-28 11:45:09 +00:00
|
|
|
nvlist_t *args = fnvlist_alloc();
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
fnvlist_add_string(args, "origin", origin);
|
|
|
|
if (props != NULL)
|
|
|
|
fnvlist_add_nvlist(args, "props", props);
|
|
|
|
error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
nvlist_free(hidden_args);
|
2013-08-28 11:45:09 +00:00
|
|
|
nvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2017-06-26 23:56:09 +00:00
|
|
|
int
|
|
|
|
lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The promote ioctl is still legacy, so we need to construct our
|
|
|
|
* own zfs_cmd_t rather than using lzc_ioctl().
|
|
|
|
*/
|
2020-07-10 00:47:12 +00:00
|
|
|
zfs_cmd_t zc = {"\0"};
|
2017-06-26 23:56:09 +00:00
|
|
|
|
|
|
|
ASSERT3S(g_refcount, >, 0);
|
|
|
|
VERIFY3S(g_fd, !=, -1);
|
|
|
|
|
|
|
|
(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
|
2019-11-12 18:40:39 +00:00
|
|
|
if (zfs_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
|
2017-06-26 23:56:09 +00:00
|
|
|
int error = errno;
|
|
|
|
if (error == EEXIST && snapnamebuf != NULL)
|
|
|
|
(void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2018-06-27 21:37:54 +00:00
|
|
|
int
|
|
|
|
lzc_rename(const char *source, const char *target)
|
|
|
|
{
|
2020-07-10 00:47:12 +00:00
|
|
|
zfs_cmd_t zc = {"\0"};
|
2018-06-27 21:37:54 +00:00
|
|
|
int error;
|
2020-07-10 00:47:12 +00:00
|
|
|
|
2018-06-27 21:37:54 +00:00
|
|
|
ASSERT3S(g_refcount, >, 0);
|
|
|
|
VERIFY3S(g_fd, !=, -1);
|
|
|
|
(void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
|
|
|
|
(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
|
2019-11-12 18:40:39 +00:00
|
|
|
error = zfs_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
|
2018-06-27 21:37:54 +00:00
|
|
|
if (error != 0)
|
|
|
|
error = errno;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
int
|
|
|
|
lzc_destroy(const char *fsname)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
|
|
|
|
nvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
/*
|
|
|
|
* Creates snapshots.
|
|
|
|
*
|
|
|
|
* The keys in the snaps nvlist are the snapshots to be created.
|
|
|
|
* They must all be in the same pool.
|
|
|
|
*
|
|
|
|
* The props nvlist is properties to set. Currently only user properties
|
|
|
|
* are supported. { user:prop_name -> string value }
|
|
|
|
*
|
|
|
|
* The returned results nvlist will have an entry for each snapshot that failed.
|
|
|
|
* The value will be the (int32) error code.
|
|
|
|
*
|
|
|
|
* The return value will be 0 if all snapshots were created, otherwise it will
|
2013-09-04 12:00:57 +00:00
|
|
|
* be the errno of a (unspecified) snapshot that failed.
|
2013-08-28 11:45:09 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
nvpair_t *elem;
|
|
|
|
nvlist_t *args;
|
|
|
|
int error;
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-08-28 11:45:09 +00:00
|
|
|
|
|
|
|
*errlist = NULL;
|
|
|
|
|
|
|
|
/* determine the pool name */
|
|
|
|
elem = nvlist_next_nvpair(snaps, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/@")] = '\0';
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_nvlist(args, "snaps", snaps);
|
|
|
|
if (props != NULL)
|
|
|
|
fnvlist_add_nvlist(args, "props", props);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
|
|
|
|
nvlist_free(args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Destroys snapshots.
|
|
|
|
*
|
|
|
|
* The keys in the snaps nvlist are the snapshots to be destroyed.
|
|
|
|
* They must all be in the same pool.
|
|
|
|
*
|
|
|
|
* Snapshots that do not exist will be silently ignored.
|
|
|
|
*
|
|
|
|
* If 'defer' is not set, and a snapshot has user holds or clones, the
|
|
|
|
* destroy operation will fail and none of the snapshots will be
|
|
|
|
* destroyed.
|
|
|
|
*
|
|
|
|
* If 'defer' is set, and a snapshot has user holds or clones, it will be
|
|
|
|
* marked for deferred destruction, and will be destroyed when the last hold
|
|
|
|
* or clone is removed/destroyed.
|
|
|
|
*
|
|
|
|
* The return value will be 0 if all snapshots were destroyed (or marked for
|
2013-06-20 22:43:17 +00:00
|
|
|
* later destruction if 'defer' is set) or didn't exist to begin with.
|
2013-08-28 11:45:09 +00:00
|
|
|
*
|
2013-09-04 12:00:57 +00:00
|
|
|
* Otherwise the return value will be the errno of a (unspecified) snapshot
|
2013-08-28 11:45:09 +00:00
|
|
|
* that failed, no snapshots will be destroyed, and the errlist will have an
|
|
|
|
* entry for each snapshot that failed. The value in the errlist will be
|
|
|
|
* the (int32) error code.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
nvpair_t *elem;
|
|
|
|
nvlist_t *args;
|
|
|
|
int error;
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-08-28 11:45:09 +00:00
|
|
|
|
|
|
|
/* determine the pool name */
|
|
|
|
elem = nvlist_next_nvpair(snaps, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/@")] = '\0';
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_nvlist(args, "snaps", snaps);
|
|
|
|
if (defer)
|
|
|
|
fnvlist_add_boolean(args, "defer");
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
|
|
|
|
nvlist_free(args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
|
|
|
|
uint64_t *usedp)
|
|
|
|
{
|
|
|
|
nvlist_t *args;
|
|
|
|
nvlist_t *result;
|
|
|
|
int err;
|
2016-06-15 21:28:36 +00:00
|
|
|
char fs[ZFS_MAX_DATASET_NAME_LEN];
|
2013-08-28 11:45:09 +00:00
|
|
|
char *atp;
|
|
|
|
|
|
|
|
/* determine the fs name */
|
|
|
|
(void) strlcpy(fs, firstsnap, sizeof (fs));
|
|
|
|
atp = strchr(fs, '@');
|
|
|
|
if (atp == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
*atp = '\0';
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_string(args, "firstsnap", firstsnap);
|
|
|
|
|
|
|
|
err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
|
|
|
|
nvlist_free(args);
|
|
|
|
if (err == 0)
|
|
|
|
*usedp = fnvlist_lookup_uint64(result, "used");
|
|
|
|
fnvlist_free(result);
|
|
|
|
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
|
|
|
boolean_t
|
|
|
|
lzc_exists(const char *dataset)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* The objset_stats ioctl is still legacy, so we need to construct our
|
2017-06-26 23:56:09 +00:00
|
|
|
* own zfs_cmd_t rather than using lzc_ioctl().
|
2013-08-28 11:45:09 +00:00
|
|
|
*/
|
2013-09-04 12:00:57 +00:00
|
|
|
zfs_cmd_t zc = {"\0"};
|
2013-08-28 11:45:09 +00:00
|
|
|
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
ASSERT3S(g_refcount, >, 0);
|
|
|
|
VERIFY3S(g_fd, !=, -1);
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
|
2019-11-12 18:40:39 +00:00
|
|
|
return (zfs_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
|
2013-08-28 11:45:09 +00:00
|
|
|
}
|
|
|
|
|
2017-05-19 19:33:11 +00:00
|
|
|
/*
|
|
|
|
* outnvl is unused.
|
|
|
|
* It was added to preserve the function signature in case it is
|
|
|
|
* needed in the future.
|
|
|
|
*/
|
|
|
|
/*ARGSUSED*/
|
|
|
|
int
|
|
|
|
lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
|
|
|
|
{
|
|
|
|
return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
|
|
|
|
}
|
|
|
|
|
2013-09-04 12:00:57 +00:00
|
|
|
/*
|
|
|
|
* Create "user holds" on snapshots. If there is a hold on a snapshot,
|
|
|
|
* the snapshot can not be destroyed. (However, it can be marked for deletion
|
|
|
|
* by lzc_destroy_snaps(defer=B_TRUE).)
|
|
|
|
*
|
|
|
|
* The keys in the nvlist are snapshot names.
|
|
|
|
* The snapshots must all be in the same pool.
|
|
|
|
* The value is the name of the hold (string type).
|
|
|
|
*
|
2019-06-19 19:27:31 +00:00
|
|
|
* If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
|
2013-09-04 12:00:57 +00:00
|
|
|
* In this case, when the cleanup_fd is closed (including on process
|
|
|
|
* termination), the holds will be released. If the system is shut down
|
|
|
|
* uncleanly, the holds will be released when the pool is next opened
|
|
|
|
* or imported.
|
|
|
|
*
|
2013-05-25 02:06:23 +00:00
|
|
|
* Holds for snapshots which don't exist will be skipped and have an entry
|
2013-06-20 22:43:17 +00:00
|
|
|
* added to errlist, but will not cause an overall failure.
|
2013-05-25 02:06:23 +00:00
|
|
|
*
|
2013-06-20 22:43:17 +00:00
|
|
|
* The return value will be 0 if all holds, for snapshots that existed,
|
2014-06-15 17:17:45 +00:00
|
|
|
* were successfully created.
|
2013-05-25 02:06:23 +00:00
|
|
|
*
|
|
|
|
* Otherwise the return value will be the errno of a (unspecified) hold that
|
|
|
|
* failed and no holds will be created.
|
|
|
|
*
|
|
|
|
* In all cases the errlist will have an entry for each hold that failed
|
|
|
|
* (name = snapshot), with its value being the error code (int32).
|
2013-09-04 12:00:57 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
|
|
|
|
{
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-09-04 12:00:57 +00:00
|
|
|
nvlist_t *args;
|
|
|
|
nvpair_t *elem;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/* determine the pool name */
|
|
|
|
elem = nvlist_next_nvpair(holds, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/@")] = '\0';
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_nvlist(args, "holds", holds);
|
|
|
|
if (cleanup_fd != -1)
|
|
|
|
fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
|
|
|
|
nvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release "user holds" on snapshots. If the snapshot has been marked for
|
|
|
|
* deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
|
|
|
|
* any clones, and all the user holds are removed, then the snapshot will be
|
|
|
|
* destroyed.
|
|
|
|
*
|
|
|
|
* The keys in the nvlist are snapshot names.
|
|
|
|
* The snapshots must all be in the same pool.
|
2016-08-08 08:00:08 +00:00
|
|
|
* The value is an nvlist whose keys are the holds to remove.
|
2013-09-04 12:00:57 +00:00
|
|
|
*
|
2013-05-25 02:06:23 +00:00
|
|
|
* Holds which failed to release because they didn't exist will have an entry
|
2013-06-20 22:43:17 +00:00
|
|
|
* added to errlist, but will not cause an overall failure.
|
2013-05-25 02:06:23 +00:00
|
|
|
*
|
|
|
|
* The return value will be 0 if the nvl holds was empty or all holds that
|
2013-06-20 22:43:17 +00:00
|
|
|
* existed, were successfully removed.
|
2013-05-25 02:06:23 +00:00
|
|
|
*
|
|
|
|
* Otherwise the return value will be the errno of a (unspecified) hold that
|
|
|
|
* failed to release and no holds will be released.
|
|
|
|
*
|
|
|
|
* In all cases the errlist will have an entry for each hold that failed to
|
|
|
|
* to release.
|
2013-09-04 12:00:57 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_release(nvlist_t *holds, nvlist_t **errlist)
|
|
|
|
{
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-09-04 12:00:57 +00:00
|
|
|
nvpair_t *elem;
|
|
|
|
|
|
|
|
/* determine the pool name */
|
|
|
|
elem = nvlist_next_nvpair(holds, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/@")] = '\0';
|
|
|
|
|
|
|
|
return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Retrieve list of user holds on the specified snapshot.
|
|
|
|
*
|
2016-08-08 08:00:08 +00:00
|
|
|
* On success, *holdsp will be set to an nvlist which the caller must free.
|
2013-09-04 12:00:57 +00:00
|
|
|
* The keys are the names of the holds, and the value is the creation time
|
|
|
|
* of the hold (uint64) in seconds since the epoch.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_get_holds(const char *snapname, nvlist_t **holdsp)
|
|
|
|
{
|
2017-05-19 19:33:11 +00:00
|
|
|
return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
|
2013-09-04 12:00:57 +00:00
|
|
|
}
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
/*
|
2014-06-05 21:19:08 +00:00
|
|
|
* Generate a zfs send stream for the specified snapshot and write it to
|
|
|
|
* the specified file descriptor.
|
2013-12-11 22:33:41 +00:00
|
|
|
*
|
|
|
|
* "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
|
|
|
|
*
|
|
|
|
* If "from" is NULL, a full (non-incremental) stream will be sent.
|
|
|
|
* If "from" is non-NULL, it must be the full name of a snapshot or
|
|
|
|
* bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
|
|
|
|
* "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
|
|
|
|
* bookmark must represent an earlier point in the history of "snapname").
|
|
|
|
* It can be an earlier snapshot in the same filesystem or zvol as "snapname",
|
|
|
|
* or it can be the origin of "snapname"'s filesystem, or an earlier
|
|
|
|
* snapshot in the origin, etc.
|
|
|
|
*
|
|
|
|
* "fd" is the file descriptor to write the send stream to.
|
2014-06-05 21:19:08 +00:00
|
|
|
*
|
2014-11-03 20:15:08 +00:00
|
|
|
* If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
|
|
|
|
* to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
|
|
|
|
* records with drr_blksz > 128K.
|
|
|
|
*
|
2014-06-05 21:19:08 +00:00
|
|
|
* If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
|
|
|
|
* to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
|
|
|
|
* which the receiving system must support (as indicated by support
|
|
|
|
* for the "embedded_data" feature).
|
Adopt pyzfs from ClusterHQ
This commit introduces several changes:
* Update LICENSE and project information
* Give a good PEP8 talk to existing Python source code
* Add RPM/DEB packaging for pyzfs
* Fix some outstanding issues with the existing pyzfs code caused by
changes in the ABI since the last time the code was updated
* Integrate pyzfs Python unittest with the ZFS Test Suite
* Add missing libzfs_core functions: lzc_change_key,
lzc_channel_program, lzc_channel_program_nosync, lzc_load_key,
lzc_receive_one, lzc_receive_resumable, lzc_receive_with_cmdprops,
lzc_receive_with_header, lzc_reopen, lzc_send_resume, lzc_sync,
lzc_unload_key, lzc_remap
Note: this commit slightly changes zfs_ioc_unload_key() ABI. This allow
to differentiate the case where we tried to unload a key on a
non-existing dataset (ENOENT) from the situation where a dataset has
no key loaded: this is consistent with the "change" case where trying
to zfs_ioc_change_key() from a dataset with no key results in EACCES.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #7230
2018-03-18 08:34:45 +00:00
|
|
|
*
|
|
|
|
* If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
|
|
|
|
* compressed WRITE records for blocks which are compressed on disk and in
|
|
|
|
* memory. If the lz4_compress feature is active on the sending system, then
|
|
|
|
* the receiving system must have that feature enabled as well.
|
|
|
|
*
|
|
|
|
* If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
|
|
|
|
* datasets, by sending data exactly as it exists on disk. This allows backups
|
|
|
|
* to be taken even if encryption keys are not currently loaded.
|
2013-08-28 11:45:09 +00:00
|
|
|
*/
|
|
|
|
int
|
2014-06-05 21:19:08 +00:00
|
|
|
lzc_send(const char *snapname, const char *from, int fd,
|
|
|
|
enum lzc_send_flags flags)
|
2016-01-06 21:22:48 +00:00
|
|
|
{
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
|
|
|
|
NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_send_redacted(const char *snapname, const char *from, int fd,
|
|
|
|
enum lzc_send_flags flags, const char *redactbook)
|
|
|
|
{
|
|
|
|
return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
|
|
|
|
redactbook));
|
2016-01-06 21:22:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_send_resume(const char *snapname, const char *from, int fd,
|
|
|
|
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
{
|
|
|
|
return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
|
|
|
|
resumeoff, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* snapname: The name of the "tosnap", or the snapshot whose contents we are
|
|
|
|
* sending.
|
|
|
|
* from: The name of the "fromsnap", or the incremental source.
|
|
|
|
* fd: File descriptor to write the stream to.
|
|
|
|
* flags: flags that determine features to be used by the stream.
|
|
|
|
* resumeobj: Object to resume from, for resuming send
|
|
|
|
* resumeoff: Offset to resume from, for resuming send.
|
|
|
|
* redactnv: nvlist of string -> boolean(ignored) containing the names of all
|
|
|
|
* the snapshots that we should redact with respect to.
|
|
|
|
* redactbook: Name of the redaction bookmark to create.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
|
|
|
|
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
|
|
|
|
const char *redactbook)
|
2013-08-28 11:45:09 +00:00
|
|
|
{
|
|
|
|
nvlist_t *args;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_int32(args, "fd", fd);
|
2013-12-11 22:33:41 +00:00
|
|
|
if (from != NULL)
|
|
|
|
fnvlist_add_string(args, "fromsnap", from);
|
2014-11-03 20:15:08 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
|
|
|
|
fnvlist_add_boolean(args, "largeblockok");
|
2014-06-05 21:19:08 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_EMBED_DATA)
|
|
|
|
fnvlist_add_boolean(args, "embedok");
|
2017-04-11 21:56:54 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_COMPRESS)
|
|
|
|
fnvlist_add_boolean(args, "compressok");
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_RAW)
|
|
|
|
fnvlist_add_boolean(args, "rawok");
|
2020-01-10 18:16:58 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_SAVED)
|
|
|
|
fnvlist_add_boolean(args, "savedok");
|
2016-01-06 21:22:48 +00:00
|
|
|
if (resumeobj != 0 || resumeoff != 0) {
|
|
|
|
fnvlist_add_uint64(args, "resume_object", resumeobj);
|
|
|
|
fnvlist_add_uint64(args, "resume_offset", resumeoff);
|
|
|
|
}
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
if (redactbook != NULL)
|
|
|
|
fnvlist_add_string(args, "redactbook", redactbook);
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
|
|
|
|
nvlist_free(args);
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-04-08 18:37:13 +00:00
|
|
|
* "from" can be NULL, a snapshot, or a bookmark.
|
|
|
|
*
|
|
|
|
* If from is NULL, a full (non-incremental) stream will be estimated. This
|
|
|
|
* is calculated very efficiently.
|
|
|
|
*
|
|
|
|
* If from is a snapshot, lzc_send_space uses the deadlists attached to
|
|
|
|
* each snapshot to efficiently estimate the stream size.
|
|
|
|
*
|
|
|
|
* If from is a bookmark, the indirect blocks in the destination snapshot
|
|
|
|
* are traversed, looking for blocks with a birth time since the creation TXG of
|
|
|
|
* the snapshot this bookmark was created from. This will result in
|
|
|
|
* significantly more I/O and be less efficient than a send space estimation on
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
* an equivalent snapshot. This process is also used if redact_snaps is
|
|
|
|
* non-null.
|
2013-08-28 11:45:09 +00:00
|
|
|
*/
|
|
|
|
int
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
lzc_send_space_resume_redacted(const char *snapname, const char *from,
|
|
|
|
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
|
|
|
|
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
|
2013-08-28 11:45:09 +00:00
|
|
|
{
|
|
|
|
nvlist_t *args;
|
|
|
|
nvlist_t *result;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
2015-04-08 18:37:13 +00:00
|
|
|
if (from != NULL)
|
|
|
|
fnvlist_add_string(args, "from", from);
|
2016-07-11 17:45:52 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
|
|
|
|
fnvlist_add_boolean(args, "largeblockok");
|
|
|
|
if (flags & LZC_SEND_FLAG_EMBED_DATA)
|
|
|
|
fnvlist_add_boolean(args, "embedok");
|
|
|
|
if (flags & LZC_SEND_FLAG_COMPRESS)
|
|
|
|
fnvlist_add_boolean(args, "compressok");
|
2017-08-31 16:00:35 +00:00
|
|
|
if (flags & LZC_SEND_FLAG_RAW)
|
|
|
|
fnvlist_add_boolean(args, "rawok");
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
if (resumeobj != 0 || resumeoff != 0) {
|
|
|
|
fnvlist_add_uint64(args, "resume_object", resumeobj);
|
|
|
|
fnvlist_add_uint64(args, "resume_offset", resumeoff);
|
|
|
|
fnvlist_add_uint64(args, "bytes", resume_bytes);
|
|
|
|
}
|
|
|
|
if (redactbook != NULL)
|
|
|
|
fnvlist_add_string(args, "redactbook", redactbook);
|
|
|
|
if (fd != -1)
|
|
|
|
fnvlist_add_int32(args, "fd", fd);
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
|
|
|
|
nvlist_free(args);
|
|
|
|
if (err == 0)
|
|
|
|
*spacep = fnvlist_lookup_uint64(result, "space");
|
|
|
|
nvlist_free(result);
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
int
|
|
|
|
lzc_send_space(const char *snapname, const char *from,
|
|
|
|
enum lzc_send_flags flags, uint64_t *spacep)
|
|
|
|
{
|
|
|
|
return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
|
|
|
|
NULL, -1, spacep));
|
|
|
|
}
|
|
|
|
|
2013-08-28 11:45:09 +00:00
|
|
|
static int
|
|
|
|
recv_read(int fd, void *buf, int ilen)
|
|
|
|
{
|
|
|
|
char *cp = buf;
|
|
|
|
int rv;
|
|
|
|
int len = ilen;
|
|
|
|
|
|
|
|
do {
|
|
|
|
rv = read(fd, cp, len);
|
|
|
|
cp += rv;
|
|
|
|
len -= rv;
|
|
|
|
} while (rv > 0);
|
|
|
|
|
|
|
|
if (rv < 0 || len != 0)
|
|
|
|
return (EIO);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
/*
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
* Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
|
|
|
|
* legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
|
|
|
|
* stream options but is currently only used for resumable streams. This way
|
|
|
|
* updated user space utilities will interoperate with older kernel modules.
|
2016-06-10 00:04:12 +00:00
|
|
|
*
|
|
|
|
* Non-Linux OpenZFS platforms have opted to modify the legacy interface.
|
|
|
|
*/
|
2016-01-06 21:22:48 +00:00
|
|
|
static int
|
2017-05-09 23:21:09 +00:00
|
|
|
recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
|
2017-10-13 17:09:04 +00:00
|
|
|
uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
|
|
|
|
boolean_t resumable, boolean_t raw, int input_fd,
|
2020-04-23 17:06:57 +00:00
|
|
|
const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
|
|
|
|
uint64_t *errflags, nvlist_t **errors)
|
2013-08-28 11:45:09 +00:00
|
|
|
{
|
2016-06-10 00:04:12 +00:00
|
|
|
dmu_replay_record_t drr;
|
|
|
|
char fsname[MAXPATHLEN];
|
2013-08-28 11:45:09 +00:00
|
|
|
char *atp;
|
|
|
|
int error;
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
boolean_t payload = B_FALSE;
|
2013-08-28 11:45:09 +00:00
|
|
|
|
OpenZFS 7745 - print error if lzc_* is called before libzfs_core_init
The problem is that consumers of `libZFS_Core` that forget to call
`libzfs_core_init()` before calling any other function of the library
are having a hard time realizing their mistake. The library's internal
file descriptor is declared as global static, which is ok, but it is not
initialized explicitly; therefore, it defaults to 0, which is a valid
file descriptor. If `libzfs_core_init()`, which explicitly initializes
the correct fd, is skipped, the ioctl functions return errors that do
not have anything to do with `libZFS_Core`, where the problem is
actually located.
Even though assertions for that existed within `libZFS_Core` for debug
builds, they were never enabled because the `-DDEBUG` flag was missing
from the compiler flags.
This patch applies the following changes:
1. It adds `-DDEBUG` for debug builds of `libZFS_Core` and `libzfs`,
to enable their assertions on debug builds.
2. It corrects an assertion within `libzfs`, where a function had
been spelled incorrectly (`zpool_prop_unsupported()`) and nobody
knew because the `-DDEBUG` flag was missing, and the preprocessor
was taking that part of the code away.
3. The library's internal fd is initialized to `-1` and `VERIFY`
assertions have been placed to check that the fd is not equal to
`-1` before issuing any ioctl. It is important here to note, that
the `VERIFY` assertions exist in both debug and non-debug builds.
4. In `libzfs_core_fini` we make sure to never increment the
refcount of our fd below 0, and also reset the fd to `-1` when no
one refers to it. The reason for this, is for the rare case that
the consumer closes all references but then calls one of the
library's functions without using `libzfs_core_init()` first, and
in the mean time, a previous call to `open()` decided to reuse
our previous fd. This scenario would have passed our assertion in
non-debug builds.
5. Once the `ASSERTION` macros were enabled again, two tests from
the test suite were failing in `libzfs_sendrecv.c` at a
`ZIO_CHECKSUM_IS_ZERO` check within `dump_record()`. We now zero
the kernel filled checksums in all `dmu_replay_record`s that we
read in `cksummer()`, except the ones that are of type
`DRR_BEGIN`.
I considered making all assertions available for both debug and
non-debug builds, but I figured that it would not be appropriate if, for
example, an outside consumer of `libZFS_Core` suddenly triggers an
assertion failure because they happened to call `libzfs_core_fini()`,
even if previously the reference counter was `0`. Therefore, all the
reference counter related assertions are only enabled for debug builds,
and fd related assertions are enabled for debug and non-debug builds.
Porting notes:
- `ASSERT3S(g_refcount, >, 0);` added to `recv_impl` in
lib/libzfs_core/libzfs_core.c .
Authored by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/7745
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7e3139a
Closes #5698
2017-01-31 18:48:45 +00:00
|
|
|
ASSERT3S(g_refcount, >, 0);
|
|
|
|
VERIFY3S(g_fd, !=, -1);
|
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
/* Set 'fsname' to the name of containing filesystem */
|
|
|
|
(void) strlcpy(fsname, snapname, sizeof (fsname));
|
|
|
|
atp = strchr(fsname, '@');
|
2013-08-28 11:45:09 +00:00
|
|
|
if (atp == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
*atp = '\0';
|
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
/* If the fs does not exist, try its parent. */
|
|
|
|
if (!lzc_exists(fsname)) {
|
|
|
|
char *slashp = strrchr(fsname, '/');
|
2013-08-28 11:45:09 +00:00
|
|
|
if (slashp == NULL)
|
|
|
|
return (ENOENT);
|
|
|
|
*slashp = '\0';
|
2016-06-10 00:04:12 +00:00
|
|
|
}
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
/*
|
|
|
|
* The begin_record is normally a non-byteswapped BEGIN record.
|
|
|
|
* For resumable streams it may be set to any non-byteswapped
|
|
|
|
* dmu_replay_record_t.
|
|
|
|
*/
|
|
|
|
if (begin_record == NULL) {
|
|
|
|
error = recv_read(input_fd, &drr, sizeof (drr));
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
} else {
|
|
|
|
drr = *begin_record;
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
payload = (begin_record->drr_payloadlen != 0);
|
2013-08-28 11:45:09 +00:00
|
|
|
}
|
|
|
|
|
2017-10-13 17:09:04 +00:00
|
|
|
/*
|
2019-09-03 00:53:27 +00:00
|
|
|
* All receives with a payload should use the new interface.
|
2017-10-13 17:09:04 +00:00
|
|
|
*/
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
if (resumable || raw || wkeydata != NULL || payload) {
|
2016-06-10 00:04:12 +00:00
|
|
|
nvlist_t *outnvl = NULL;
|
|
|
|
nvlist_t *innvl = fnvlist_alloc();
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
fnvlist_add_string(innvl, "snapname", snapname);
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2017-05-09 23:21:09 +00:00
|
|
|
if (recvdprops != NULL)
|
|
|
|
fnvlist_add_nvlist(innvl, "props", recvdprops);
|
|
|
|
|
|
|
|
if (localprops != NULL)
|
|
|
|
fnvlist_add_nvlist(innvl, "localprops", localprops);
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2017-10-13 17:09:04 +00:00
|
|
|
if (wkeydata != NULL) {
|
|
|
|
/*
|
|
|
|
* wkeydata must be placed in the special
|
|
|
|
* ZPOOL_HIDDEN_ARGS nvlist so that it
|
|
|
|
* will not be printed to the zpool history.
|
|
|
|
*/
|
|
|
|
nvlist_t *hidden_args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint8_array(hidden_args, "wkeydata",
|
|
|
|
wkeydata, wkeylen);
|
|
|
|
fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
|
|
|
|
hidden_args);
|
|
|
|
nvlist_free(hidden_args);
|
|
|
|
}
|
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
if (origin != NULL && strlen(origin))
|
|
|
|
fnvlist_add_string(innvl, "origin", origin);
|
|
|
|
|
|
|
|
fnvlist_add_byte_array(innvl, "begin_record",
|
2016-12-12 18:46:26 +00:00
|
|
|
(uchar_t *)&drr, sizeof (drr));
|
2016-06-10 00:04:12 +00:00
|
|
|
|
|
|
|
fnvlist_add_int32(innvl, "input_fd", input_fd);
|
|
|
|
|
|
|
|
if (force)
|
|
|
|
fnvlist_add_boolean(innvl, "force");
|
|
|
|
|
|
|
|
if (resumable)
|
|
|
|
fnvlist_add_boolean(innvl, "resumable");
|
|
|
|
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
|
|
|
|
|
|
|
|
if (error == 0 && read_bytes != NULL)
|
|
|
|
error = nvlist_lookup_uint64(outnvl, "read_bytes",
|
|
|
|
read_bytes);
|
|
|
|
|
|
|
|
if (error == 0 && errflags != NULL)
|
|
|
|
error = nvlist_lookup_uint64(outnvl, "error_flags",
|
|
|
|
errflags);
|
|
|
|
|
|
|
|
if (error == 0 && errors != NULL) {
|
|
|
|
nvlist_t *nvl;
|
|
|
|
error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
|
|
|
|
if (error == 0)
|
|
|
|
*errors = fnvlist_dup(nvl);
|
|
|
|
}
|
|
|
|
|
|
|
|
fnvlist_free(innvl);
|
|
|
|
fnvlist_free(outnvl);
|
2016-06-16 17:01:33 +00:00
|
|
|
} else {
|
2016-06-10 00:04:12 +00:00
|
|
|
zfs_cmd_t zc = {"\0"};
|
|
|
|
char *packed = NULL;
|
|
|
|
size_t size;
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
ASSERT3S(g_refcount, >, 0);
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2019-04-08 16:10:59 +00:00
|
|
|
(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
|
2016-06-10 00:04:12 +00:00
|
|
|
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2017-05-09 23:21:09 +00:00
|
|
|
if (recvdprops != NULL) {
|
|
|
|
packed = fnvlist_pack(recvdprops, &size);
|
2016-06-10 00:04:12 +00:00
|
|
|
zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
|
|
|
|
zc.zc_nvlist_src_size = size;
|
|
|
|
}
|
2016-01-06 21:22:48 +00:00
|
|
|
|
2017-05-09 23:21:09 +00:00
|
|
|
if (localprops != NULL) {
|
|
|
|
packed = fnvlist_pack(localprops, &size);
|
|
|
|
zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
|
|
|
|
zc.zc_nvlist_conf_size = size;
|
|
|
|
}
|
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
if (origin != NULL)
|
|
|
|
(void) strlcpy(zc.zc_string, origin,
|
|
|
|
sizeof (zc.zc_string));
|
2013-08-28 11:45:09 +00:00
|
|
|
|
2016-06-10 00:04:12 +00:00
|
|
|
ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
|
|
|
|
zc.zc_begin_record = drr.drr_u.drr_begin;
|
|
|
|
zc.zc_guid = force;
|
|
|
|
zc.zc_cookie = input_fd;
|
|
|
|
zc.zc_cleanup_fd = -1;
|
|
|
|
zc.zc_action_handle = 0;
|
|
|
|
|
|
|
|
zc.zc_nvlist_dst_size = 128 * 1024;
|
|
|
|
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
|
|
|
|
malloc(zc.zc_nvlist_dst_size);
|
|
|
|
|
2019-11-12 18:40:39 +00:00
|
|
|
error = zfs_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
|
2016-06-10 00:04:12 +00:00
|
|
|
if (error != 0) {
|
|
|
|
error = errno;
|
|
|
|
} else {
|
|
|
|
if (read_bytes != NULL)
|
|
|
|
*read_bytes = zc.zc_cookie;
|
|
|
|
|
|
|
|
if (errflags != NULL)
|
|
|
|
*errflags = zc.zc_obj;
|
|
|
|
|
|
|
|
if (errors != NULL)
|
|
|
|
VERIFY0(nvlist_unpack(
|
|
|
|
(void *)(uintptr_t)zc.zc_nvlist_dst,
|
|
|
|
zc.zc_nvlist_dst_size, errors, KM_SLEEP));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (packed != NULL)
|
|
|
|
fnvlist_pack_free(packed, size);
|
|
|
|
free((void *)(uintptr_t)zc.zc_nvlist_dst);
|
|
|
|
}
|
2013-08-28 11:45:09 +00:00
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
2013-08-14 19:42:31 +00:00
|
|
|
|
2016-01-06 21:22:48 +00:00
|
|
|
/*
|
|
|
|
* The simplest receive case: receive from the specified fd, creating the
|
|
|
|
* specified snapshot. Apply the specified properties as "received" properties
|
|
|
|
* (which can be overridden by locally-set properties). If the stream is a
|
|
|
|
* clone, its origin snapshot must be specified by 'origin'. The 'force'
|
|
|
|
* flag will cause the target filesystem to be rolled back or destroyed if
|
|
|
|
* necessary to receive.
|
|
|
|
*
|
|
|
|
* Return 0 on success or an errno on failure.
|
|
|
|
*
|
|
|
|
* Note: this interface does not work on dedup'd streams
|
|
|
|
* (those with DMU_BACKUP_FEATURE_DEDUP).
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
boolean_t force, boolean_t raw, int fd)
|
2016-01-06 21:22:48 +00:00
|
|
|
{
|
2017-10-13 17:09:04 +00:00
|
|
|
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
|
2020-04-23 17:06:57 +00:00
|
|
|
B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
|
2016-01-06 21:22:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Like lzc_receive, but if the receive fails due to premature stream
|
|
|
|
* termination, the intermediate state will be preserved on disk. In this
|
|
|
|
* case, ECKSUM will be returned. The receive may subsequently be resumed
|
|
|
|
* with a resuming send stream generated by lzc_send_resume().
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
boolean_t force, boolean_t raw, int fd)
|
2016-01-06 21:22:48 +00:00
|
|
|
{
|
2017-10-13 17:09:04 +00:00
|
|
|
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
|
2020-04-23 17:06:57 +00:00
|
|
|
B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
|
2016-06-16 17:01:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Like lzc_receive, but allows the caller to read the begin record and then to
|
|
|
|
* pass it in. That could be useful if the caller wants to derive, for example,
|
|
|
|
* the snapname or the origin parameters based on the information contained in
|
|
|
|
* the begin record.
|
|
|
|
* The begin record must be in its original form as read from the stream,
|
|
|
|
* in other words, it should not be byteswapped.
|
|
|
|
*
|
|
|
|
* The 'resumable' parameter allows to obtain the same behavior as with
|
|
|
|
* lzc_receive_resumable.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_receive_with_header(const char *snapname, nvlist_t *props,
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
|
|
|
|
int fd, const dmu_replay_record_t *begin_record)
|
2016-06-16 17:01:33 +00:00
|
|
|
{
|
|
|
|
if (begin_record == NULL)
|
|
|
|
return (EINVAL);
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
|
2017-10-13 17:09:04 +00:00
|
|
|
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
|
2020-04-23 17:06:57 +00:00
|
|
|
resumable, raw, fd, begin_record, NULL, NULL, NULL));
|
2016-06-10 00:04:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Like lzc_receive, but allows the caller to pass all supported arguments
|
|
|
|
* and retrieve all values returned. The only additional input parameter
|
|
|
|
* is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
|
|
|
|
*
|
|
|
|
* The following parameters all provide return values. Several may be set
|
|
|
|
* in the failure case and will contain additional information.
|
|
|
|
*
|
|
|
|
* The 'read_bytes' value will be set to the total number of bytes read.
|
|
|
|
*
|
|
|
|
* The 'errflags' value will contain zprop_errflags_t flags which are
|
|
|
|
* used to describe any failures.
|
|
|
|
*
|
2020-04-23 17:06:57 +00:00
|
|
|
* The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
|
2016-06-10 00:04:12 +00:00
|
|
|
*
|
|
|
|
* The 'errors' nvlist contains an entry for each unapplied received
|
|
|
|
* property. Callers are responsible for freeing this nvlist.
|
|
|
|
*/
|
|
|
|
int lzc_receive_one(const char *snapname, nvlist_t *props,
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
|
|
|
|
int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
|
2016-06-10 00:04:12 +00:00
|
|
|
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
|
|
|
|
nvlist_t **errors)
|
|
|
|
{
|
2017-10-13 17:09:04 +00:00
|
|
|
return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
|
2020-04-23 17:06:57 +00:00
|
|
|
resumable, raw, input_fd, begin_record,
|
|
|
|
read_bytes, errflags, errors));
|
2017-05-09 23:21:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
|
|
|
|
* argument.
|
|
|
|
*
|
|
|
|
* The 'cmdprops' nvlist contains both override ('zfs receive -o') and
|
|
|
|
* exclude ('zfs receive -x') properties. Callers are responsible for freeing
|
|
|
|
* this nvlist
|
|
|
|
*/
|
|
|
|
int lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
|
2017-10-13 17:09:04 +00:00
|
|
|
nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
|
|
|
|
boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
const dmu_replay_record_t *begin_record, int cleanup_fd,
|
|
|
|
uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
|
|
|
|
nvlist_t **errors)
|
2017-05-09 23:21:09 +00:00
|
|
|
{
|
2017-10-13 17:09:04 +00:00
|
|
|
return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
|
2020-04-23 17:06:57 +00:00
|
|
|
force, resumable, raw, input_fd, begin_record,
|
|
|
|
read_bytes, errflags, errors));
|
2016-01-06 21:22:48 +00:00
|
|
|
}
|
|
|
|
|
2013-08-14 19:42:31 +00:00
|
|
|
/*
|
|
|
|
* Roll back this filesystem or volume to its most recent snapshot.
|
|
|
|
* If snapnamebuf is not NULL, it will be filled in with the name
|
|
|
|
* of the most recent snapshot.
|
2017-03-11 18:26:47 +00:00
|
|
|
* Note that the latest snapshot may change if a new one is concurrently
|
|
|
|
* created or the current one is destroyed. lzc_rollback_to can be used
|
|
|
|
* to roll back to a specific latest snapshot.
|
2013-08-14 19:42:31 +00:00
|
|
|
*
|
|
|
|
* Return 0 on success or an errno on failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
|
|
|
|
{
|
|
|
|
nvlist_t *args;
|
|
|
|
nvlist_t *result;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
|
|
|
|
nvlist_free(args);
|
|
|
|
if (err == 0 && snapnamebuf != NULL) {
|
|
|
|
const char *snapname = fnvlist_lookup_string(result, "target");
|
|
|
|
(void) strlcpy(snapnamebuf, snapname, snapnamelen);
|
|
|
|
}
|
2017-01-21 19:33:11 +00:00
|
|
|
nvlist_free(result);
|
|
|
|
|
2013-08-14 19:42:31 +00:00
|
|
|
return (err);
|
|
|
|
}
|
2013-12-11 22:33:41 +00:00
|
|
|
|
2017-03-11 18:26:47 +00:00
|
|
|
/*
|
|
|
|
* Roll back this filesystem or volume to the specified snapshot,
|
|
|
|
* if possible.
|
|
|
|
*
|
|
|
|
* Return 0 on success or an errno on failure.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_rollback_to(const char *fsname, const char *snapname)
|
|
|
|
{
|
|
|
|
nvlist_t *args;
|
|
|
|
nvlist_t *result;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_string(args, "target", snapname);
|
|
|
|
err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
|
|
|
|
nvlist_free(args);
|
|
|
|
nvlist_free(result);
|
|
|
|
return (err);
|
|
|
|
}
|
|
|
|
|
2013-12-11 22:33:41 +00:00
|
|
|
/*
|
2019-11-11 07:24:14 +00:00
|
|
|
* Creates new bookmarks from existing snapshot or bookmark.
|
2013-12-11 22:33:41 +00:00
|
|
|
*
|
2019-11-11 07:24:14 +00:00
|
|
|
* The bookmarks nvlist maps from the full name of the new bookmark to
|
|
|
|
* the full name of the source snapshot or bookmark.
|
|
|
|
* All the bookmarks and snapshots must be in the same pool.
|
|
|
|
* The new bookmarks names must be unique.
|
|
|
|
* => see function dsl_bookmark_create_nvl_validate
|
2013-12-11 22:33:41 +00:00
|
|
|
*
|
|
|
|
* The returned results nvlist will have an entry for each bookmark that failed.
|
|
|
|
* The value will be the (int32) error code.
|
|
|
|
*
|
|
|
|
* The return value will be 0 if all bookmarks were created, otherwise it will
|
|
|
|
* be the errno of a (undetermined) bookmarks that failed.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
nvpair_t *elem;
|
|
|
|
int error;
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-12-11 22:33:41 +00:00
|
|
|
|
2019-11-11 07:24:14 +00:00
|
|
|
/* determine pool name from first bookmark */
|
2013-12-11 22:33:41 +00:00
|
|
|
elem = nvlist_next_nvpair(bookmarks, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/#")] = '\0';
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Retrieve bookmarks.
|
|
|
|
*
|
|
|
|
* Retrieve the list of bookmarks for the given file system. The props
|
|
|
|
* parameter is an nvlist of property names (with no values) that will be
|
|
|
|
* returned for each bookmark.
|
|
|
|
*
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
* The following are valid properties on bookmarks, most of which are numbers
|
|
|
|
* (represented as uint64 in the nvlist), except redact_snaps, which is a
|
|
|
|
* uint64 array, and redact_complete, which is a boolean
|
2013-12-11 22:33:41 +00:00
|
|
|
*
|
|
|
|
* "guid" - globally unique identifier of the snapshot it refers to
|
|
|
|
* "createtxg" - txg when the snapshot it refers to was created
|
|
|
|
* "creation" - timestamp when the snapshot it refers to was created
|
2019-02-04 19:24:55 +00:00
|
|
|
* "ivsetguid" - IVset guid for identifying encrypted snapshots
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
* "redact_snaps" - list of guids of the redaction snapshots for the specified
|
|
|
|
* bookmark. If the bookmark is not a redaction bookmark, the nvlist will
|
|
|
|
* not contain an entry for this value. If it is redacted with respect to
|
|
|
|
* no snapshots, it will contain value -> NULL uint64 array
|
|
|
|
* "redact_complete" - boolean value; true if the redaction bookmark is
|
|
|
|
* complete, false otherwise.
|
2013-12-11 22:33:41 +00:00
|
|
|
*
|
|
|
|
* The format of the returned nvlist as follows:
|
|
|
|
* <short name of bookmark> -> {
|
|
|
|
* <name of property> -> {
|
|
|
|
* "value" -> uint64
|
|
|
|
* }
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
* ...
|
|
|
|
* "redact_snaps" -> {
|
|
|
|
* "value" -> uint64 array
|
|
|
|
* }
|
|
|
|
* "redact_complete" -> {
|
|
|
|
* "value" -> boolean value
|
|
|
|
* }
|
2013-12-11 22:33:41 +00:00
|
|
|
* }
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
|
|
|
|
{
|
|
|
|
return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
|
|
|
|
}
|
|
|
|
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
/*
|
|
|
|
* Get bookmark properties.
|
|
|
|
*
|
|
|
|
* Given a bookmark's full name, retrieve all properties for the bookmark.
|
|
|
|
*
|
|
|
|
* The format of the returned property list is as follows:
|
|
|
|
* {
|
|
|
|
* <name of property> -> {
|
|
|
|
* "value" -> uint64
|
|
|
|
* }
|
|
|
|
* ...
|
|
|
|
* "redact_snaps" -> {
|
|
|
|
* "value" -> uint64 array
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
nvlist_t *innvl = fnvlist_alloc();
|
|
|
|
error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
|
|
|
|
fnvlist_free(innvl);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-12-11 22:33:41 +00:00
|
|
|
/*
|
|
|
|
* Destroys bookmarks.
|
|
|
|
*
|
|
|
|
* The keys in the bmarks nvlist are the bookmarks to be destroyed.
|
|
|
|
* They must all be in the same pool. Bookmarks are specified as
|
|
|
|
* <fs>#<bmark>.
|
|
|
|
*
|
|
|
|
* Bookmarks that do not exist will be silently ignored.
|
|
|
|
*
|
|
|
|
* The return value will be 0 if all bookmarks that existed were destroyed.
|
|
|
|
*
|
|
|
|
* Otherwise the return value will be the errno of a (undetermined) bookmark
|
|
|
|
* that failed, no bookmarks will be destroyed, and the errlist will have an
|
|
|
|
* entry for each bookmarks that failed. The value in the errlist will be
|
|
|
|
* the (int32) error code.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
nvpair_t *elem;
|
|
|
|
int error;
|
2016-06-15 21:28:36 +00:00
|
|
|
char pool[ZFS_MAX_DATASET_NAME_LEN];
|
2013-12-11 22:33:41 +00:00
|
|
|
|
|
|
|
/* determine the pool name */
|
|
|
|
elem = nvlist_next_nvpair(bmarks, NULL);
|
|
|
|
if (elem == NULL)
|
|
|
|
return (0);
|
|
|
|
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
|
|
|
|
pool[strcspn(pool, "/#")] = '\0';
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
|
2018-02-08 16:35:09 +00:00
|
|
|
static int
|
|
|
|
lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
|
|
|
|
uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
nvlist_t *args;
|
|
|
|
|
|
|
|
args = fnvlist_alloc();
|
|
|
|
fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
|
|
|
|
fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
|
|
|
|
fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
|
|
|
|
fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
|
|
|
|
fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
|
|
|
|
error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
|
|
|
|
fnvlist_free(args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2018-02-08 16:16:23 +00:00
|
|
|
/*
|
|
|
|
* Executes a channel program.
|
|
|
|
*
|
|
|
|
* If this function returns 0 the channel program was successfully loaded and
|
|
|
|
* ran without failing. Note that individual commands the channel program ran
|
|
|
|
* may have failed and the channel program is responsible for reporting such
|
|
|
|
* errors through outnvl if they are important.
|
|
|
|
*
|
|
|
|
* This method may also return:
|
|
|
|
*
|
|
|
|
* EINVAL The program contains syntax errors, or an invalid memory or time
|
|
|
|
* limit was given. No part of the channel program was executed.
|
|
|
|
* If caused by syntax errors, 'outnvl' contains information about the
|
|
|
|
* errors.
|
|
|
|
*
|
|
|
|
* ECHRNG The program was executed, but encountered a runtime error, such as
|
|
|
|
* calling a function with incorrect arguments, invoking the error()
|
|
|
|
* function directly, failing an assert() command, etc. Some portion
|
|
|
|
* of the channel program may have executed and committed changes.
|
|
|
|
* Information about the failure can be found in 'outnvl'.
|
|
|
|
*
|
|
|
|
* ENOMEM The program fully executed, but the output buffer was not large
|
|
|
|
* enough to store the returned value. No output is returned through
|
|
|
|
* 'outnvl'.
|
|
|
|
*
|
|
|
|
* ENOSPC The program was terminated because it exceeded its memory usage
|
|
|
|
* limit. Some portion of the channel program may have executed and
|
|
|
|
* committed changes to disk. No output is returned through 'outnvl'.
|
|
|
|
*
|
|
|
|
* ETIME The program was terminated because it exceeded its Lua instruction
|
|
|
|
* limit. Some portion of the channel program may have executed and
|
|
|
|
* committed changes to disk. No output is returned through 'outnvl'.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
|
|
|
|
uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
|
|
|
|
{
|
2018-02-08 16:35:09 +00:00
|
|
|
return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
|
|
|
|
memlimit, argnvl, outnvl));
|
|
|
|
}
|
2018-02-08 16:16:23 +00:00
|
|
|
|
2016-12-16 22:11:29 +00:00
|
|
|
/*
|
|
|
|
* Creates a checkpoint for the specified pool.
|
|
|
|
*
|
|
|
|
* If this function returns 0 the pool was successfully checkpointed.
|
|
|
|
*
|
|
|
|
* This method may also return:
|
|
|
|
*
|
|
|
|
* ZFS_ERR_CHECKPOINT_EXISTS
|
|
|
|
* The pool already has a checkpoint. A pools can only have one
|
|
|
|
* checkpoint at most, at any given time.
|
|
|
|
*
|
|
|
|
* ZFS_ERR_DISCARDING_CHECKPOINT
|
|
|
|
* ZFS is in the middle of discarding a checkpoint for this pool.
|
|
|
|
* The pool can be checkpointed again once the discard is done.
|
|
|
|
*
|
|
|
|
* ZFS_DEVRM_IN_PROGRESS
|
|
|
|
* A vdev is currently being removed. The pool cannot be
|
|
|
|
* checkpointed until the device removal is done.
|
|
|
|
*
|
|
|
|
* ZFS_VDEV_TOO_BIG
|
|
|
|
* One or more top-level vdevs exceed the maximum vdev size
|
|
|
|
* supported for this feature.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_pool_checkpoint(const char *pool)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
nvlist_t *result = NULL;
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
fnvlist_free(result);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Discard the checkpoint from the specified pool.
|
|
|
|
*
|
|
|
|
* If this function returns 0 the checkpoint was successfully discarded.
|
|
|
|
*
|
|
|
|
* This method may also return:
|
|
|
|
*
|
|
|
|
* ZFS_ERR_NO_CHECKPOINT
|
|
|
|
* The pool does not have a checkpoint.
|
|
|
|
*
|
|
|
|
* ZFS_ERR_DISCARDING_CHECKPOINT
|
|
|
|
* ZFS is already in the middle of discarding the checkpoint.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_pool_checkpoint_discard(const char *pool)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
nvlist_t *result = NULL;
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
fnvlist_free(result);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2018-02-08 16:35:09 +00:00
|
|
|
/*
|
|
|
|
* Executes a read-only channel program.
|
|
|
|
*
|
|
|
|
* A read-only channel program works programmatically the same way as a
|
|
|
|
* normal channel program executed with lzc_channel_program(). The only
|
|
|
|
* difference is it runs exclusively in open-context and therefore can
|
|
|
|
* return faster. The downside to that, is that the program cannot change
|
|
|
|
* on-disk state by calling functions from the zfs.sync submodule.
|
|
|
|
*
|
|
|
|
* The return values of this function (and their meaning) are exactly the
|
|
|
|
* same as the ones described in lzc_channel_program().
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_channel_program_nosync(const char *pool, const char *program,
|
|
|
|
uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
|
|
|
|
{
|
|
|
|
return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
|
|
|
|
memlimit, argnvl, outnvl));
|
2018-02-08 16:16:23 +00:00
|
|
|
}
|
|
|
|
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
/*
|
|
|
|
* Performs key management functions
|
|
|
|
*
|
Adopt pyzfs from ClusterHQ
This commit introduces several changes:
* Update LICENSE and project information
* Give a good PEP8 talk to existing Python source code
* Add RPM/DEB packaging for pyzfs
* Fix some outstanding issues with the existing pyzfs code caused by
changes in the ABI since the last time the code was updated
* Integrate pyzfs Python unittest with the ZFS Test Suite
* Add missing libzfs_core functions: lzc_change_key,
lzc_channel_program, lzc_channel_program_nosync, lzc_load_key,
lzc_receive_one, lzc_receive_resumable, lzc_receive_with_cmdprops,
lzc_receive_with_header, lzc_reopen, lzc_send_resume, lzc_sync,
lzc_unload_key, lzc_remap
Note: this commit slightly changes zfs_ioc_unload_key() ABI. This allow
to differentiate the case where we tried to unload a key on a
non-existing dataset (ENOENT) from the situation where a dataset has
no key loaded: this is consistent with the "change" case where trying
to zfs_ioc_change_key() from a dataset with no key results in EACCES.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #7230
2018-03-18 08:34:45 +00:00
|
|
|
* crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
|
|
|
|
* load or change a wrapping key, the key should be specified in the
|
|
|
|
* hidden_args nvlist so that it is not logged.
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
|
|
|
|
uint_t wkeylen)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
nvlist_t *ioc_args;
|
|
|
|
nvlist_t *hidden_args;
|
|
|
|
|
|
|
|
if (wkeydata == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
ioc_args = fnvlist_alloc();
|
|
|
|
hidden_args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
|
|
|
|
fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
|
|
|
|
if (noop)
|
|
|
|
fnvlist_add_boolean(ioc_args, "noop");
|
|
|
|
error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
|
|
|
|
nvlist_free(hidden_args);
|
|
|
|
nvlist_free(ioc_args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_unload_key(const char *fsname)
|
|
|
|
{
|
|
|
|
return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
|
|
|
|
uint8_t *wkeydata, uint_t wkeylen)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
nvlist_t *ioc_args = fnvlist_alloc();
|
|
|
|
nvlist_t *hidden_args = NULL;
|
|
|
|
|
|
|
|
fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
|
|
|
|
|
|
|
|
if (wkeydata != NULL) {
|
|
|
|
hidden_args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
|
|
|
|
wkeylen);
|
|
|
|
fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (props != NULL)
|
|
|
|
fnvlist_add_nvlist(ioc_args, "props", props);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
|
|
|
|
nvlist_free(hidden_args);
|
|
|
|
nvlist_free(ioc_args);
|
2018-02-08 16:16:23 +00:00
|
|
|
|
Native Encryption for ZFS on Linux
This change incorporates three major pieces:
The first change is a keystore that manages wrapping
and encryption keys for encrypted datasets. These
commands mostly involve manipulating the new
DSL Crypto Key ZAP Objects that live in the MOS. Each
encrypted dataset has its own DSL Crypto Key that is
protected with a user's key. This level of indirection
allows users to change their keys without re-encrypting
their entire datasets. The change implements the new
subcommands "zfs load-key", "zfs unload-key" and
"zfs change-key" which allow the user to manage their
encryption keys and settings. In addition, several new
flags and properties have been added to allow dataset
creation and to make mounting and unmounting more
convenient.
The second piece of this patch provides the ability to
encrypt, decyrpt, and authenticate protected datasets.
Each object set maintains a Merkel tree of Message
Authentication Codes that protect the lower layers,
similarly to how checksums are maintained. This part
impacts the zio layer, which handles the actual
encryption and generation of MACs, as well as the ARC
and DMU, which need to be able to handle encrypted
buffers and protected data.
The last addition is the ability to do raw, encrypted
sends and receives. The idea here is to send raw
encrypted and compressed data and receive it exactly
as is on a backup system. This means that the dataset
on the receiving system is protected using the same
user key that is in use on the sending side. By doing
so, datasets can be efficiently backed up to an
untrusted system without fear of data being
compromised.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #494
Closes #5769
2017-08-14 17:36:48 +00:00
|
|
|
return (error);
|
|
|
|
}
|
2017-10-26 19:26:09 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
lzc_reopen(const char *pool_name, boolean_t scrub_restart)
|
|
|
|
{
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
int error;
|
|
|
|
|
|
|
|
fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
|
|
|
|
nvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
OpenZFS 9102 - zfs should be able to initialize storage devices
PROBLEM
========
The first access to a block incurs a performance penalty on some platforms
(e.g. AWS's EBS, VMware VMDKs). Therefore we recommend that volumes are
"thick provisioned", where supported by the platform (VMware). This can
create a large delay in getting a new virtual machines up and running (or
adding storage to an existing Engine). If the thick provision step is
omitted, write performance will be suboptimal until all blocks on the LUN
have been written.
SOLUTION
=========
This feature introduces a way to 'initialize' the disks at install or in the
background to make sure we don't incur this first read penalty.
When an entire LUN is added to ZFS, we make all space available immediately,
and allow ZFS to find unallocated space and zero it out. This works with
concurrent writes to arbitrary offsets, ensuring that we don't zero out
something that has been (or is in the middle of being) written. This scheme
can also be applied to existing pools (affecting only free regions on the
vdev). Detailed design:
- new subcommand:zpool initialize [-cs] <pool> [<vdev> ...]
- start, suspend, or cancel initialization
- Creates new open-context thread for each vdev
- Thread iterates through all metaslabs in this vdev
- Each metaslab:
- select a metaslab
- load the metaslab
- mark the metaslab as being zeroed
- walk all free ranges within that metaslab and translate
them to ranges on the leaf vdev
- issue a "zeroing" I/O on the leaf vdev that corresponds to
a free range on the metaslab we're working on
- continue until all free ranges for this metaslab have been
"zeroed"
- reset/unmark the metaslab being zeroed
- if more metaslabs exist, then repeat above tasks.
- if no more metaslabs, then we're done.
- progress for the initialization is stored on-disk in the vdev’s
leaf zap object. The following information is stored:
- the last offset that has been initialized
- the state of the initialization process (i.e. active,
suspended, or canceled)
- the start time for the initialization
- progress is reported via the zpool status command and shows
information for each of the vdevs that are initializing
Porting notes:
- Added zfs_initialize_value module parameter to set the pattern
written by "zpool initialize".
- Added zfs_vdev_{initializing,removal}_{min,max}_active module options.
Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: loli10K <ezomori.nozomu@gmail.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/9102
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c3963210eb
Closes #8230
2018-12-19 14:54:59 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Changes initializing state.
|
|
|
|
*
|
|
|
|
* vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
|
|
|
|
* The key is ignored.
|
|
|
|
*
|
|
|
|
* If there are errors related to vdev arguments, per-vdev errors are returned
|
|
|
|
* in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
|
|
|
|
* guid is stringified with PRIu64, and errno is one of the following as
|
|
|
|
* an int64_t:
|
|
|
|
* - ENODEV if the device was not found
|
|
|
|
* - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
|
|
|
|
* - EROFS if the device is not writeable
|
2019-03-29 16:13:20 +00:00
|
|
|
* - EBUSY start requested but the device is already being either
|
|
|
|
* initialized or trimmed
|
OpenZFS 9102 - zfs should be able to initialize storage devices
PROBLEM
========
The first access to a block incurs a performance penalty on some platforms
(e.g. AWS's EBS, VMware VMDKs). Therefore we recommend that volumes are
"thick provisioned", where supported by the platform (VMware). This can
create a large delay in getting a new virtual machines up and running (or
adding storage to an existing Engine). If the thick provision step is
omitted, write performance will be suboptimal until all blocks on the LUN
have been written.
SOLUTION
=========
This feature introduces a way to 'initialize' the disks at install or in the
background to make sure we don't incur this first read penalty.
When an entire LUN is added to ZFS, we make all space available immediately,
and allow ZFS to find unallocated space and zero it out. This works with
concurrent writes to arbitrary offsets, ensuring that we don't zero out
something that has been (or is in the middle of being) written. This scheme
can also be applied to existing pools (affecting only free regions on the
vdev). Detailed design:
- new subcommand:zpool initialize [-cs] <pool> [<vdev> ...]
- start, suspend, or cancel initialization
- Creates new open-context thread for each vdev
- Thread iterates through all metaslabs in this vdev
- Each metaslab:
- select a metaslab
- load the metaslab
- mark the metaslab as being zeroed
- walk all free ranges within that metaslab and translate
them to ranges on the leaf vdev
- issue a "zeroing" I/O on the leaf vdev that corresponds to
a free range on the metaslab we're working on
- continue until all free ranges for this metaslab have been
"zeroed"
- reset/unmark the metaslab being zeroed
- if more metaslabs exist, then repeat above tasks.
- if no more metaslabs, then we're done.
- progress for the initialization is stored on-disk in the vdev’s
leaf zap object. The following information is stored:
- the last offset that has been initialized
- the state of the initialization process (i.e. active,
suspended, or canceled)
- the start time for the initialization
- progress is reported via the zpool status command and shows
information for each of the vdevs that are initializing
Porting notes:
- Added zfs_initialize_value module parameter to set the pattern
written by "zpool initialize".
- Added zfs_vdev_{initializing,removal}_{min,max}_active module options.
Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: loli10K <ezomori.nozomu@gmail.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/9102
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c3963210eb
Closes #8230
2018-12-19 14:54:59 +00:00
|
|
|
* - ESRCH cancel/suspend requested but device is not being initialized
|
|
|
|
*
|
|
|
|
* If the errlist is empty, then return value will be:
|
|
|
|
* - EINVAL if one or more arguments was invalid
|
|
|
|
* - Other spa_open failures
|
|
|
|
* - 0 if the operation succeeded
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
|
|
|
|
nvlist_t *vdevs, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
int error;
|
2019-03-29 16:13:20 +00:00
|
|
|
|
OpenZFS 9102 - zfs should be able to initialize storage devices
PROBLEM
========
The first access to a block incurs a performance penalty on some platforms
(e.g. AWS's EBS, VMware VMDKs). Therefore we recommend that volumes are
"thick provisioned", where supported by the platform (VMware). This can
create a large delay in getting a new virtual machines up and running (or
adding storage to an existing Engine). If the thick provision step is
omitted, write performance will be suboptimal until all blocks on the LUN
have been written.
SOLUTION
=========
This feature introduces a way to 'initialize' the disks at install or in the
background to make sure we don't incur this first read penalty.
When an entire LUN is added to ZFS, we make all space available immediately,
and allow ZFS to find unallocated space and zero it out. This works with
concurrent writes to arbitrary offsets, ensuring that we don't zero out
something that has been (or is in the middle of being) written. This scheme
can also be applied to existing pools (affecting only free regions on the
vdev). Detailed design:
- new subcommand:zpool initialize [-cs] <pool> [<vdev> ...]
- start, suspend, or cancel initialization
- Creates new open-context thread for each vdev
- Thread iterates through all metaslabs in this vdev
- Each metaslab:
- select a metaslab
- load the metaslab
- mark the metaslab as being zeroed
- walk all free ranges within that metaslab and translate
them to ranges on the leaf vdev
- issue a "zeroing" I/O on the leaf vdev that corresponds to
a free range on the metaslab we're working on
- continue until all free ranges for this metaslab have been
"zeroed"
- reset/unmark the metaslab being zeroed
- if more metaslabs exist, then repeat above tasks.
- if no more metaslabs, then we're done.
- progress for the initialization is stored on-disk in the vdev’s
leaf zap object. The following information is stored:
- the last offset that has been initialized
- the state of the initialization process (i.e. active,
suspended, or canceled)
- the start time for the initialization
- progress is reported via the zpool status command and shows
information for each of the vdevs that are initializing
Porting notes:
- Added zfs_initialize_value module parameter to set the pattern
written by "zpool initialize".
- Added zfs_vdev_{initializing,removal}_{min,max}_active module options.
Authored by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Wren Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: loli10K <ezomori.nozomu@gmail.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Richard Lowe <richlowe@richlowe.net>
Signed-off-by: Tim Chase <tim@chase2k.com>
Ported-by: Tim Chase <tim@chase2k.com>
OpenZFS-issue: https://www.illumos.org/issues/9102
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/c3963210eb
Closes #8230
2018-12-19 14:54:59 +00:00
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
|
|
|
|
fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
2019-03-29 16:13:20 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Changes TRIM state.
|
|
|
|
*
|
|
|
|
* vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
|
|
|
|
* The key is ignored.
|
|
|
|
*
|
|
|
|
* If there are errors related to vdev arguments, per-vdev errors are returned
|
|
|
|
* in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
|
|
|
|
* guid is stringified with PRIu64, and errno is one of the following as
|
|
|
|
* an int64_t:
|
|
|
|
* - ENODEV if the device was not found
|
|
|
|
* - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
|
|
|
|
* - EROFS if the device is not writeable
|
|
|
|
* - EBUSY start requested but the device is already being either trimmed
|
|
|
|
* or initialized
|
|
|
|
* - ESRCH cancel/suspend requested but device is not being initialized
|
|
|
|
* - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
|
|
|
|
*
|
|
|
|
* If the errlist is empty, then return value will be:
|
|
|
|
* - EINVAL if one or more arguments was invalid
|
|
|
|
* - Other spa_open failures
|
|
|
|
* - 0 if the operation succeeded
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
|
|
|
|
boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
|
|
|
|
{
|
|
|
|
int error;
|
|
|
|
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
|
|
|
|
fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
|
|
|
|
fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
|
|
|
|
fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
|
|
|
|
|
|
|
|
error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
Implement Redacted Send/Receive
Redacted send/receive allows users to send subsets of their data to
a target system. One possible use case for this feature is to not
transmit sensitive information to a data warehousing, test/dev, or
analytics environment. Another is to save space by not replicating
unimportant data within a given dataset, for example in backup tools
like zrepl.
Redacted send/receive is a three-stage process. First, a clone (or
clones) is made of the snapshot to be sent to the target. In this
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction
snapshot" (or snapshots). Second, the new zfs redact command is used
to create a redaction bookmark. The redaction bookmark stores the
list of blocks in a snapshot that were modified by the redaction
snapshot(s). Finally, the redaction bookmark is passed as a parameter
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive
or unwanted information, and those blocks are not included in the send
stream. When sending from the redaction bookmark, the blocks it
contains are considered as candidate blocks in addition to those
blocks in the destination snapshot that were modified since the
creation_txg of the redaction bookmark. This step is necessary to
allow the target to rehydrate data in the case where some blocks are
accidentally or unnecessarily modified in the redaction snapshot.
The changes to bookmarks to enable fast space estimation involve
adding deadlists to bookmarks. There is also logic to manage the
life cycles of these deadlists.
The new size estimation process operates in cases where previously
an accurate estimate could not be provided. In those cases, a send
is performed where no data blocks are read, reducing the runtime
significantly and providing a byte-accurate size estimate.
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
2019-06-19 16:48:13 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a redaction bookmark named bookname by redacting snapshot with respect
|
|
|
|
* to all the snapshots in snapnv.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
|
|
|
|
{
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
fnvlist_add_string(args, "bookname", bookname);
|
|
|
|
fnvlist_add_nvlist(args, "snapnv", snapnv);
|
|
|
|
int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
|
|
|
|
fnvlist_free(args);
|
|
|
|
return (error);
|
|
|
|
}
|
Add subcommand to wait for background zfs activity to complete
Currently the best way to wait for the completion of a long-running
operation in a pool, like a scrub or device removal, is to poll 'zpool
status' and parse its output, which is neither efficient nor convenient.
This change adds a 'wait' subcommand to the zpool command. When invoked,
'zpool wait' will block until a specified type of background activity
completes. Currently, this subcommand can wait for any of the following:
- Scrubs or resilvers to complete
- Devices to initialized
- Devices to be replaced
- Devices to be removed
- Checkpoints to be discarded
- Background freeing to complete
For example, a scrub that is in progress could be waited for by running
zpool wait -t scrub <pool>
This also adds a -w flag to the attach, checkpoint, initialize, replace,
remove, and scrub subcommands. When used, this flag makes the operations
kicked off by these subcommands synchronous instead of asynchronous.
This functionality is implemented using a new ioctl. The type of
activity to wait for is provided as input to the ioctl, and the ioctl
blocks until all activity of that type has completed. An ioctl was used
over other methods of kernel-userspace communiction primarily for the
sake of portability.
Porting Notes:
This is ported from Delphix OS change DLPX-44432. The following changes
were made while porting:
- Added ZoL-style ioctl input declaration.
- Reorganized error handling in zpool_initialize in libzfs to integrate
better with changes made for TRIM support.
- Fixed check for whether a checkpoint discard is in progress.
Previously it also waited if the pool had a checkpoint, instead of
just if a checkpoint was being discarded.
- Exposed zfs_initialize_chunk_size as a ZoL-style tunable.
- Updated more existing tests to make use of new 'zpool wait'
functionality, tests that don't exist in Delphix OS.
- Used existing ZoL tunable zfs_scan_suspend_progress, together with
zinject, in place of a new tunable zfs_scan_max_blks_per_txg.
- Added support for a non-integral interval argument to zpool wait.
Future work:
ZoL has support for trimming devices, which Delphix OS does not. In the
future, 'zpool wait' could be extended to add the ability to wait for
trim operations to complete.
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: John Gallagher <john.gallagher@delphix.com>
Closes #9162
2019-09-14 01:09:06 +00:00
|
|
|
|
|
|
|
static int
|
|
|
|
wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
|
|
|
|
uint64_t tag, boolean_t *waited)
|
|
|
|
{
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
nvlist_t *result = NULL;
|
|
|
|
|
|
|
|
fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
|
|
|
|
if (use_tag)
|
|
|
|
fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
|
|
|
|
|
|
|
|
int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
|
|
|
|
|
|
|
|
if (error == 0 && waited != NULL)
|
|
|
|
*waited = fnvlist_lookup_boolean_value(result,
|
|
|
|
ZPOOL_WAIT_WAITED);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
fnvlist_free(result);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
|
|
|
|
{
|
|
|
|
return (wait_common(pool, activity, B_FALSE, 0, waited));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
|
|
|
|
boolean_t *waited)
|
|
|
|
{
|
|
|
|
return (wait_common(pool, activity, B_TRUE, tag, waited));
|
|
|
|
}
|
2020-04-01 17:02:06 +00:00
|
|
|
|
|
|
|
int
|
|
|
|
lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
|
|
|
|
{
|
|
|
|
nvlist_t *args = fnvlist_alloc();
|
|
|
|
nvlist_t *result = NULL;
|
|
|
|
|
|
|
|
fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
|
|
|
|
|
|
|
|
int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
|
|
|
|
|
|
|
|
if (error == 0 && waited != NULL)
|
|
|
|
*waited = fnvlist_lookup_boolean_value(result,
|
|
|
|
ZFS_WAIT_WAITED);
|
|
|
|
|
|
|
|
fnvlist_free(args);
|
|
|
|
fnvlist_free(result);
|
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
2020-05-07 16:36:33 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the bootenv contents for the given pool.
|
|
|
|
*/
|
|
|
|
int
|
2020-09-15 22:42:27 +00:00
|
|
|
lzc_set_bootenv(const char *pool, const nvlist_t *env)
|
2020-05-07 16:36:33 +00:00
|
|
|
{
|
2020-09-15 22:42:27 +00:00
|
|
|
return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
|
2020-05-07 16:36:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the contents of the bootenv of the given pool.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
|
|
|
|
{
|
|
|
|
return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));
|
|
|
|
}
|