BRT: Fix FICLONE/FICLONERANGE shortened copy
On Linux the ioctl_ficlonerange() and ioctl_ficlone() system calls are expected to either fully clone the specified range or return an error. The range may be for an entire file. While internally ZFS supports cloning partial ranges there's no way to return the length cloned to the caller so we need to make this all or nothing. As part of this change support for the REMAP_FILE_CAN_SHORTEN flag has been added. When REMAP_FILE_CAN_SHORTEN is set zfs_clone_range() will return a shortened range when encountering pending dirty records. When it's clear zfs_clone_range() will block and wait for the records to be written out allowing the blocks to be cloned. Furthermore, the file range lock is held over the region being cloned to prevent it from being modified while cloning. This doesn't quite provide an atomic semantics since if an error is encountered only a portion of the range may be cloned. This will be converted to an error if REMAP_FILE_CAN_SHORTEN was not provided and returned to the caller. However, the destination file range is left in an undefined state. A test case has been added which exercises this functionality by verifying that `cp --reflink=never|auto|always` works correctly. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #15728 Closes #15842
This commit is contained in:
parent
9ef15845f5
commit
00d85a98ea
|
@ -286,7 +286,6 @@ typedef struct zfid_long {
|
||||||
|
|
||||||
extern uint_t zfs_fsyncer_key;
|
extern uint_t zfs_fsyncer_key;
|
||||||
extern int zfs_super_owner;
|
extern int zfs_super_owner;
|
||||||
extern int zfs_bclone_enabled;
|
|
||||||
|
|
||||||
extern void zfs_init(void);
|
extern void zfs_init(void);
|
||||||
extern void zfs_fini(void);
|
extern void zfs_fini(void);
|
||||||
|
|
|
@ -45,8 +45,6 @@ extern "C" {
|
||||||
typedef struct zfsvfs zfsvfs_t;
|
typedef struct zfsvfs zfsvfs_t;
|
||||||
struct znode;
|
struct znode;
|
||||||
|
|
||||||
extern int zfs_bclone_enabled;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This structure emulates the vfs_t from other platforms. It's purpose
|
* This structure emulates the vfs_t from other platforms. It's purpose
|
||||||
* is to facilitate the handling of mount options and minimize structural
|
* is to facilitate the handling of mount options and minimize structural
|
||||||
|
|
|
@ -24,8 +24,11 @@
|
||||||
|
|
||||||
#ifndef _SYS_FS_ZFS_VNOPS_H
|
#ifndef _SYS_FS_ZFS_VNOPS_H
|
||||||
#define _SYS_FS_ZFS_VNOPS_H
|
#define _SYS_FS_ZFS_VNOPS_H
|
||||||
|
|
||||||
#include <sys/zfs_vnops_os.h>
|
#include <sys/zfs_vnops_os.h>
|
||||||
|
|
||||||
|
extern int zfs_bclone_enabled;
|
||||||
|
|
||||||
extern int zfs_fsync(znode_t *, int, cred_t *);
|
extern int zfs_fsync(znode_t *, int, cred_t *);
|
||||||
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
|
extern int zfs_read(znode_t *, zfs_uio_t *, int, cred_t *);
|
||||||
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
|
extern int zfs_write(znode_t *, zfs_uio_t *, int, cred_t *);
|
||||||
|
|
|
@ -1142,6 +1142,15 @@ Enable the experimental block cloning feature.
|
||||||
If this setting is 0, then even if feature@block_cloning is enabled,
|
If this setting is 0, then even if feature@block_cloning is enabled,
|
||||||
attempts to clone blocks will act as though the feature is disabled.
|
attempts to clone blocks will act as though the feature is disabled.
|
||||||
.
|
.
|
||||||
|
.It Sy zfs_bclone_wait_dirty Ns = Ns Sy 0 Ns | Ns 1 Pq int
|
||||||
|
When set to 1 the FICLONE and FICLONERANGE ioctls wait for dirty data to be
|
||||||
|
written to disk.
|
||||||
|
This allows the clone operation to reliably succeed when a file is
|
||||||
|
modified and then immediately cloned.
|
||||||
|
For small files this may be slower than making a copy of the file.
|
||||||
|
Therefore, this setting defaults to 0 which causes a clone operation to
|
||||||
|
immediately fail when encountering a dirty block.
|
||||||
|
.
|
||||||
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
||||||
Select a BLAKE3 implementation.
|
Select a BLAKE3 implementation.
|
||||||
.Pp
|
.Pp
|
||||||
|
|
|
@ -89,10 +89,6 @@ int zfs_debug_level;
|
||||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
||||||
"Debug level");
|
"Debug level");
|
||||||
|
|
||||||
int zfs_bclone_enabled = 0;
|
|
||||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
|
|
||||||
&zfs_bclone_enabled, 0, "Enable block cloning");
|
|
||||||
|
|
||||||
struct zfs_jailparam {
|
struct zfs_jailparam {
|
||||||
int mount_snapshot;
|
int mount_snapshot;
|
||||||
};
|
};
|
||||||
|
|
|
@ -4248,9 +4248,4 @@ EXPORT_SYMBOL(zfs_map);
|
||||||
/* CSTYLED */
|
/* CSTYLED */
|
||||||
module_param(zfs_delete_blocks, ulong, 0644);
|
module_param(zfs_delete_blocks, ulong, 0644);
|
||||||
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
||||||
|
|
||||||
/* CSTYLED */
|
|
||||||
module_param(zfs_bclone_enabled, uint, 0644);
|
|
||||||
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -31,8 +31,6 @@
|
||||||
#include <sys/zfs_vnops.h>
|
#include <sys/zfs_vnops.h>
|
||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
|
|
||||||
int zfs_bclone_enabled = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clone part of a file via block cloning.
|
* Clone part of a file via block cloning.
|
||||||
*
|
*
|
||||||
|
@ -40,7 +38,7 @@ int zfs_bclone_enabled = 0;
|
||||||
* care of that depending on how it was called.
|
* care of that depending on how it was called.
|
||||||
*/
|
*/
|
||||||
static ssize_t
|
static ssize_t
|
||||||
__zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
|
||||||
struct file *dst_file, loff_t dst_off, size_t len)
|
struct file *dst_file, loff_t dst_off, size_t len)
|
||||||
{
|
{
|
||||||
struct inode *src_i = file_inode(src_file);
|
struct inode *src_i = file_inode(src_file);
|
||||||
|
@ -96,11 +94,12 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||||
{
|
{
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
|
/* Flags is reserved for future extensions and must be zero. */
|
||||||
if (flags != 0)
|
if (flags != 0)
|
||||||
return (-EINVAL);
|
return (-EINVAL);
|
||||||
|
|
||||||
/* Try to do it via zfs_clone_range() */
|
/* Try to do it via zfs_clone_range() and allow shortening. */
|
||||||
ret = __zpl_clone_file_range(src_file, src_off,
|
ret = zpl_clone_file_range_impl(src_file, src_off,
|
||||||
dst_file, dst_off, len);
|
dst_file, dst_off, len);
|
||||||
|
|
||||||
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
|
#ifdef HAVE_VFS_GENERIC_COPY_FILE_RANGE
|
||||||
|
@ -137,6 +136,11 @@ zpl_copy_file_range(struct file *src_file, loff_t src_off,
|
||||||
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
|
* FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
|
||||||
* range in both files and if they're the same, arrange for them to be backed
|
* range in both files and if they're the same, arrange for them to be backed
|
||||||
* by the same storage.
|
* by the same storage.
|
||||||
|
*
|
||||||
|
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
|
||||||
|
* if we want. It's designed for filesystems that may need to shorten the
|
||||||
|
* length for alignment, EOF, or any other requirement. ZFS may shorten the
|
||||||
|
* request when there is outstanding dirty data which hasn't been written.
|
||||||
*/
|
*/
|
||||||
loff_t
|
loff_t
|
||||||
zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||||
|
@ -145,24 +149,21 @@ zpl_remap_file_range(struct file *src_file, loff_t src_off,
|
||||||
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
|
if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
|
||||||
return (-EINVAL);
|
return (-EINVAL);
|
||||||
|
|
||||||
/*
|
/* No support for dedup yet */
|
||||||
* REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given
|
|
||||||
* range if we want. Its designed for filesystems that make data past
|
|
||||||
* EOF available, and don't want it to be visible in both files. ZFS
|
|
||||||
* doesn't do that, so we just turn the flag off.
|
|
||||||
*/
|
|
||||||
flags &= ~REMAP_FILE_CAN_SHORTEN;
|
|
||||||
|
|
||||||
if (flags & REMAP_FILE_DEDUP)
|
if (flags & REMAP_FILE_DEDUP)
|
||||||
/* No support for dedup yet */
|
|
||||||
return (-EOPNOTSUPP);
|
return (-EOPNOTSUPP);
|
||||||
|
|
||||||
/* Zero length means to clone everything to the end of the file */
|
/* Zero length means to clone everything to the end of the file */
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
len = i_size_read(file_inode(src_file)) - src_off;
|
len = i_size_read(file_inode(src_file)) - src_off;
|
||||||
|
|
||||||
return (__zpl_clone_file_range(src_file, src_off,
|
ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
|
||||||
dst_file, dst_off, len));
|
dst_file, dst_off, len);
|
||||||
|
|
||||||
|
if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
|
||||||
|
ret = -EINVAL;
|
||||||
|
|
||||||
|
return (ret);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
|
#endif /* HAVE_VFS_REMAP_FILE_RANGE */
|
||||||
|
|
||||||
|
@ -179,8 +180,14 @@ zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
len = i_size_read(file_inode(src_file)) - src_off;
|
len = i_size_read(file_inode(src_file)) - src_off;
|
||||||
|
|
||||||
return (__zpl_clone_file_range(src_file, src_off,
|
/* The entire length must be cloned or this is an error. */
|
||||||
dst_file, dst_off, len));
|
ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
|
||||||
|
dst_file, dst_off, len);
|
||||||
|
|
||||||
|
if (ret >= 0 && ret != len)
|
||||||
|
ret = -EINVAL;
|
||||||
|
|
||||||
|
return (ret);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
|
#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */
|
||||||
|
|
||||||
|
@ -214,8 +221,7 @@ zpl_ioctl_ficlone(struct file *dst_file, void *arg)
|
||||||
|
|
||||||
size_t len = i_size_read(file_inode(src_file));
|
size_t len = i_size_read(file_inode(src_file));
|
||||||
|
|
||||||
ssize_t ret =
|
ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
|
||||||
__zpl_clone_file_range(src_file, 0, dst_file, 0, len);
|
|
||||||
|
|
||||||
fput(src_file);
|
fput(src_file);
|
||||||
|
|
||||||
|
@ -253,7 +259,7 @@ zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
|
||||||
if (len == 0)
|
if (len == 0)
|
||||||
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
|
len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
|
||||||
|
|
||||||
ssize_t ret = __zpl_clone_file_range(src_file, fcr.fcr_src_offset,
|
ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
|
||||||
dst_file, fcr.fcr_dest_offset, len);
|
dst_file, fcr.fcr_dest_offset, len);
|
||||||
|
|
||||||
fput(src_file);
|
fput(src_file);
|
||||||
|
|
|
@ -58,6 +58,26 @@
|
||||||
#include <sys/zfs_vfsops.h>
|
#include <sys/zfs_vfsops.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enable the experimental block cloning feature. If this setting is 0, then
|
||||||
|
* even if feature@block_cloning is enabled, attempts to clone blocks will act
|
||||||
|
* as though the feature is disabled.
|
||||||
|
*/
|
||||||
|
int zfs_bclone_enabled = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When set zfs_clone_range() waits for dirty data to be written to disk.
|
||||||
|
* This allows the clone operation to reliably succeed when a file is modified
|
||||||
|
* and then immediately cloned. For small files this may be slower than making
|
||||||
|
* a copy of the file and is therefore not the default. However, in certain
|
||||||
|
* scenarios this behavior may be desirable so a tunable is provided.
|
||||||
|
*/
|
||||||
|
static int zfs_bclone_wait_dirty = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Maximum bytes to read per chunk in zfs_read().
|
||||||
|
*/
|
||||||
|
static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024;
|
||||||
|
|
||||||
static ulong_t zfs_fsync_sync_cnt = 4;
|
static ulong_t zfs_fsync_sync_cnt = 4;
|
||||||
|
|
||||||
|
@ -189,8 +209,6 @@ zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read bytes from specified file into supplied buffer.
|
* Read bytes from specified file into supplied buffer.
|
||||||
*
|
*
|
||||||
|
@ -1055,6 +1073,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
size_t maxblocks, nbps;
|
size_t maxblocks, nbps;
|
||||||
uint_t inblksz;
|
uint_t inblksz;
|
||||||
uint64_t clear_setid_bits_txg = 0;
|
uint64_t clear_setid_bits_txg = 0;
|
||||||
|
uint64_t last_synced_txg = 0;
|
||||||
|
|
||||||
inoff = *inoffp;
|
inoff = *inoffp;
|
||||||
outoff = *outoffp;
|
outoff = *outoffp;
|
||||||
|
@ -1293,15 +1312,23 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
||||||
}
|
}
|
||||||
|
|
||||||
nbps = maxblocks;
|
nbps = maxblocks;
|
||||||
|
last_synced_txg = spa_last_synced_txg(dmu_objset_spa(inos));
|
||||||
error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
|
error = dmu_read_l0_bps(inos, inzp->z_id, inoff, size, bps,
|
||||||
&nbps);
|
&nbps);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
/*
|
/*
|
||||||
* If we are trying to clone a block that was created
|
* If we are trying to clone a block that was created
|
||||||
* in the current transaction group, error will be
|
* in the current transaction group, the error will be
|
||||||
* EAGAIN here, which we can just return to the caller
|
* EAGAIN here. Based on zfs_bclone_wait_dirty either
|
||||||
* so it can fallback if it likes.
|
* return a shortened range to the caller so it can
|
||||||
|
* fallback, or wait for the next TXG and check again.
|
||||||
*/
|
*/
|
||||||
|
if (error == EAGAIN && zfs_bclone_wait_dirty) {
|
||||||
|
txg_wait_synced(dmu_objset_pool(inos),
|
||||||
|
last_synced_txg + 1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1523,3 +1550,9 @@ EXPORT_SYMBOL(zfs_clone_range_replay);
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
|
||||||
"Bytes to read per chunk");
|
"Bytes to read per chunk");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs, zfs_, bclone_enabled, INT, ZMOD_RW,
|
||||||
|
"Enable block cloning");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs, zfs_, bclone_wait_dirty, INT, ZMOD_RW,
|
||||||
|
"Wait for dirty blocks when cloning");
|
||||||
|
|
|
@ -630,7 +630,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
|
||||||
tags = ['functional', 'compression']
|
tags = ['functional', 'compression']
|
||||||
|
|
||||||
[tests/functional/cp_files]
|
[tests/functional/cp_files]
|
||||||
tests = ['cp_files_001_pos', 'cp_stress']
|
tests = ['cp_files_001_pos', 'cp_files_002_pos', 'cp_stress']
|
||||||
tags = ['functional', 'cp_files']
|
tags = ['functional', 'cp_files']
|
||||||
|
|
||||||
[tests/functional/crtime]
|
[tests/functional/crtime]
|
||||||
|
|
|
@ -176,6 +176,7 @@ if sys.platform.startswith('freebsd'):
|
||||||
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
|
'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
|
||||||
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
|
'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
|
||||||
'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason],
|
'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason],
|
||||||
|
'cp_files/cp_files_002_pos': ['SKIP', na_reason],
|
||||||
'link_count/link_count_001': ['SKIP', na_reason],
|
'link_count/link_count_001': ['SKIP', na_reason],
|
||||||
'casenorm/mixed_create_failure': ['FAIL', 13215],
|
'casenorm/mixed_create_failure': ['FAIL', 13215],
|
||||||
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
|
'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
|
||||||
|
@ -312,6 +313,7 @@ elif sys.platform.startswith('linux'):
|
||||||
['SKIP', cfr_reason],
|
['SKIP', cfr_reason],
|
||||||
'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
|
'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
|
||||||
'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
|
'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
|
||||||
|
'cp_files/cp_files_002_pos': ['SKIP', cfr_reason],
|
||||||
'fault/auto_online_002_pos': ['FAIL', 11889],
|
'fault/auto_online_002_pos': ['FAIL', 11889],
|
||||||
'fault/auto_replace_001_pos': ['FAIL', 14851],
|
'fault/auto_replace_001_pos': ['FAIL', 14851],
|
||||||
'fault/auto_spare_002_pos': ['FAIL', 11889],
|
'fault/auto_spare_002_pos': ['FAIL', 11889],
|
||||||
|
|
|
@ -91,6 +91,7 @@ VOL_MODE vol.mode zvol_volmode
|
||||||
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
||||||
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
||||||
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
|
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
|
||||||
|
BCLONE_WAIT_DIRTY zfs_bclone_wait_dirty zfs_bclone_wait_dirty
|
||||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
||||||
|
|
|
@ -1393,6 +1393,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
||||||
functional/compression/setup.ksh \
|
functional/compression/setup.ksh \
|
||||||
functional/cp_files/cleanup.ksh \
|
functional/cp_files/cleanup.ksh \
|
||||||
functional/cp_files/cp_files_001_pos.ksh \
|
functional/cp_files/cp_files_001_pos.ksh \
|
||||||
|
functional/cp_files/cp_files_002_pos.ksh \
|
||||||
functional/cp_files/cp_stress.ksh \
|
functional/cp_files/cp_stress.ksh \
|
||||||
functional/cp_files/setup.ksh \
|
functional/cp_files/setup.ksh \
|
||||||
functional/crtime/cleanup.ksh \
|
functional/crtime/cleanup.ksh \
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
#! /bin/ksh -p
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License (the "License").
|
||||||
|
# You may not use this file except in compliance with the License.
|
||||||
|
#
|
||||||
|
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
# or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
# See the License for the specific language governing permissions
|
||||||
|
# and limitations under the License.
|
||||||
|
#
|
||||||
|
# When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
# If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
# fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
# information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright (c) 2024 by Lawrence Livermore National Security, LLC.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
. $STF_SUITE/tests/functional/bclone/bclone_common.kshlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Verify all cp --reflink modes work with modified file.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Verify "cp --reflink=never|auto|always" behaves as expected.
|
||||||
|
# Two different modes of operation are tested.
|
||||||
|
#
|
||||||
|
# a. zfs_bclone_wait_dirty=0: FICLONE and FICLONERANGE fail with EINVAL
|
||||||
|
# when there are dirty blocks which cannot be immediately cloned.
|
||||||
|
# This is the default behavior.
|
||||||
|
#
|
||||||
|
# b. zfs_bclone_wait_dirty=1: FICLONE and FICLONERANGE wait for
|
||||||
|
# dirty blocks to be written to disk allowing the clone to succeed.
|
||||||
|
# The downside to this is it may be slow which depending on the
|
||||||
|
# situtation may defeat the point of making a clone.
|
||||||
|
#
|
||||||
|
|
||||||
|
verify_runnable "global"
|
||||||
|
verify_block_cloning
|
||||||
|
|
||||||
|
if ! is_linux; then
|
||||||
|
log_unsupported "cp --reflink is a GNU coreutils option"
|
||||||
|
fi
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
datasetexists $TESTPOOL/cp-reflink && \
|
||||||
|
destroy_dataset $$TESTPOOL/cp-reflink -f
|
||||||
|
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
|
||||||
|
}
|
||||||
|
|
||||||
|
function verify_copy
|
||||||
|
{
|
||||||
|
src_cksum=$(sha256digest $1)
|
||||||
|
dst_cksum=$(sha256digest $2)
|
||||||
|
|
||||||
|
if [[ "$src_cksum" != "$dst_cksum" ]]; then
|
||||||
|
log_must ls -l $CP_TESTDIR
|
||||||
|
log_fail "checksum mismatch ($src_cksum != $dst_cksum)"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
log_assert "Verify all cp --reflink modes work with modified file"
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
SRC_FILE=src.data
|
||||||
|
DST_FILE=dst.data
|
||||||
|
SRC_SIZE=$(($RANDOM % 2048))
|
||||||
|
|
||||||
|
# A smaller recordsize is used merely to speed up the test.
|
||||||
|
RECORDSIZE=4096
|
||||||
|
|
||||||
|
log_must zfs create -o recordsize=$RECORDSIZE $TESTPOOL/cp-reflink
|
||||||
|
CP_TESTDIR=$(get_prop mountpoint $TESTPOOL/cp-reflink)
|
||||||
|
|
||||||
|
log_must cd $CP_TESTDIR
|
||||||
|
|
||||||
|
# Never wait on dirty blocks (zfs_bclone_wait_dirty=0)
|
||||||
|
log_must set_tunable32 BCLONE_WAIT_DIRTY 0
|
||||||
|
|
||||||
|
for mode in "never" "auto" "always"; do
|
||||||
|
log_note "Checking 'cp --reflink=$mode'"
|
||||||
|
|
||||||
|
# Create a new file and immediately copy it.
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
|
||||||
|
|
||||||
|
if [[ "$mode" == "always" ]]; then
|
||||||
|
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
log_must ls -l $CP_TESTDIR
|
||||||
|
else
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
fi
|
||||||
|
log_must rm -f $DST_FILE
|
||||||
|
|
||||||
|
# Append to an existing file and immediately copy it.
|
||||||
|
sync_pool $TESTPOOL
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
|
||||||
|
count=1 conv=notrunc
|
||||||
|
if [[ "$mode" == "always" ]]; then
|
||||||
|
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
log_must ls -l $CP_TESTDIR
|
||||||
|
else
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
fi
|
||||||
|
log_must rm -f $DST_FILE
|
||||||
|
|
||||||
|
# Overwrite a random range of an existing file and immediately copy it.
|
||||||
|
sync_pool $TESTPOOL
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
|
||||||
|
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
|
||||||
|
if [[ "$mode" == "always" ]]; then
|
||||||
|
log_mustnot cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
log_must ls -l $CP_TESTDIR
|
||||||
|
else
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
fi
|
||||||
|
log_must rm -f $SRC_FILE $DST_FILE
|
||||||
|
done
|
||||||
|
|
||||||
|
# Wait on dirty blocks (zfs_bclone_wait_dirty=1)
|
||||||
|
log_must set_tunable32 BCLONE_WAIT_DIRTY 1
|
||||||
|
|
||||||
|
for mode in "never" "auto" "always"; do
|
||||||
|
log_note "Checking 'cp --reflink=$mode'"
|
||||||
|
|
||||||
|
# Create a new file and immediately copy it.
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE count=$SRC_SIZE
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
log_must rm -f $DST_FILE
|
||||||
|
|
||||||
|
# Append to an existing file and immediately copy it.
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$RECORDSIZE seek=$SRC_SIZE \
|
||||||
|
count=1 conv=notrunc
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
log_must rm -f $DST_FILE
|
||||||
|
|
||||||
|
# Overwrite a random range of an existing file and immediately copy it.
|
||||||
|
log_must dd if=/dev/urandom of=$SRC_FILE bs=$((RECORDSIZE / 2)) \
|
||||||
|
seek=$(($RANDOM % $SRC_SIZE)) count=$(($RANDOM % 16)) conv=notrunc
|
||||||
|
log_must cp --reflink=$mode $SRC_FILE $DST_FILE
|
||||||
|
verify_copy $SRC_FILE $DST_FILE
|
||||||
|
log_must rm -f $SRC_FILE $DST_FILE
|
||||||
|
done
|
||||||
|
|
||||||
|
log_pass
|
Loading…
Reference in New Issue