Combine OS-independent ABD Code into Common Source File
Reorganizing ABD code base so OS-independent ABD code has been placed into a common abd.c file. OS-dependent ABD code has been left in each OS's ABD source files, and these source files have been renamed to abd_os. The OS-independent ABD code is now under: module/zfs/abd.c With the OS-dependent code in: module/os/linux/zfs/abd_os.c module/os/freebsd/zfs/abd_os.c Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #10293
This commit is contained in:
parent
bd95f00d4b
commit
fc551d7efb
|
@ -2,6 +2,7 @@ SUBDIRS = fm fs crypto lua sysevent
|
|||
|
||||
COMMON_H = \
|
||||
$(top_srcdir)/include/sys/abd.h \
|
||||
$(top_srcdir)/include/sys/abd_impl.h \
|
||||
$(top_srcdir)/include/sys/aggsum.h \
|
||||
$(top_srcdir)/include/sys/arc.h \
|
||||
$(top_srcdir)/include/sys/arc_impl.h \
|
||||
|
|
|
@ -35,56 +35,14 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum abd_flags {
|
||||
ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
|
||||
ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
|
||||
ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */
|
||||
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
|
||||
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
|
||||
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
|
||||
} abd_flags_t;
|
||||
|
||||
typedef struct abd {
|
||||
abd_flags_t abd_flags;
|
||||
uint_t abd_size; /* excludes scattered abd_offset */
|
||||
struct abd *abd_parent;
|
||||
zfs_refcount_t abd_children;
|
||||
union {
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
uint_t abd_chunk_size;
|
||||
void *abd_chunks[];
|
||||
#else
|
||||
uint_t abd_nents;
|
||||
struct scatterlist *abd_sgl;
|
||||
#endif
|
||||
} abd_scatter;
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
|
||||
} abd_linear;
|
||||
} abd_u;
|
||||
} abd_t;
|
||||
struct abd; /* forward declaration */
|
||||
typedef struct abd abd_t;
|
||||
|
||||
typedef int abd_iter_func_t(void *buf, size_t len, void *private);
|
||||
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);
|
||||
|
||||
extern int zfs_abd_scatter_enabled;
|
||||
|
||||
static inline boolean_t
|
||||
abd_is_linear(abd_t *abd)
|
||||
{
|
||||
return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
static inline boolean_t
|
||||
abd_is_linear_page(abd_t *abd)
|
||||
{
|
||||
return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ?
|
||||
B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocations and deallocations
|
||||
*/
|
||||
|
@ -124,12 +82,8 @@ void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
|
|||
int abd_cmp(abd_t *, abd_t *);
|
||||
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
|
||||
void abd_zero_off(abd_t *, size_t, size_t);
|
||||
|
||||
#if defined(_KERNEL)
|
||||
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
|
||||
size_t);
|
||||
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
|
||||
#endif
|
||||
void abd_verify(abd_t *);
|
||||
uint_t abd_get_size(abd_t *);
|
||||
|
||||
void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
||||
ssize_t csize, ssize_t dsize, const unsigned parity,
|
||||
|
@ -174,13 +128,29 @@ abd_zero(abd_t *abd, size_t size)
|
|||
abd_zero_off(abd, 0, size);
|
||||
}
|
||||
|
||||
/*
|
||||
* ABD type check functions
|
||||
*/
|
||||
boolean_t abd_is_linear(abd_t *);
|
||||
boolean_t abd_is_linear_page(abd_t *);
|
||||
|
||||
/*
|
||||
* Module lifecycle
|
||||
* Defined in each specific OS's abd_os.c
|
||||
*/
|
||||
|
||||
void abd_init(void);
|
||||
void abd_fini(void);
|
||||
|
||||
/*
|
||||
* Linux ABD bio functions
|
||||
*/
|
||||
#if defined(__linux__) && defined(_KERNEL)
|
||||
unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
|
||||
size_t);
|
||||
unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016, 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ABD_IMPL_H
|
||||
#define _ABD_IMPL_H
|
||||
|
||||
#include <sys/abd.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum abd_flags {
|
||||
ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
|
||||
ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
|
||||
ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */
|
||||
ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
|
||||
ABD_FLAG_MULTI_CHUNK = 1 << 4, /* pages split over multiple chunks */
|
||||
ABD_FLAG_LINEAR_PAGE = 1 << 5, /* linear but allocd from page */
|
||||
} abd_flags_t;
|
||||
|
||||
typedef enum abd_stats_op {
|
||||
ABDSTAT_INCR, /* Increase abdstat values */
|
||||
ABDSTAT_DECR /* Decrease abdstat values */
|
||||
} abd_stats_op_t;
|
||||
|
||||
struct abd {
|
||||
abd_flags_t abd_flags;
|
||||
uint_t abd_size; /* excludes scattered abd_offset */
|
||||
struct abd *abd_parent;
|
||||
zfs_refcount_t abd_children;
|
||||
union {
|
||||
struct abd_scatter {
|
||||
uint_t abd_offset;
|
||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||
uint_t abd_chunk_size;
|
||||
void *abd_chunks[];
|
||||
#else
|
||||
uint_t abd_nents;
|
||||
struct scatterlist *abd_sgl;
|
||||
#endif
|
||||
} abd_scatter;
|
||||
struct abd_linear {
|
||||
void *abd_buf;
|
||||
struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
|
||||
} abd_linear;
|
||||
} abd_u;
|
||||
};
|
||||
|
||||
struct scatterlist; /* forward declaration */
|
||||
|
||||
struct abd_iter {
|
||||
/* public interface */
|
||||
void *iter_mapaddr; /* addr corresponding to iter_pos */
|
||||
size_t iter_mapsize; /* length of data valid at mapaddr */
|
||||
|
||||
/* private */
|
||||
abd_t *iter_abd; /* ABD being iterated through */
|
||||
size_t iter_pos;
|
||||
size_t iter_offset; /* offset in current sg/abd_buf, */
|
||||
/* abd_offset included */
|
||||
struct scatterlist *iter_sg; /* current sg */
|
||||
};
|
||||
|
||||
/*
|
||||
* OS specific functions
|
||||
*/
|
||||
|
||||
abd_t *abd_alloc_struct(size_t);
|
||||
abd_t *abd_get_offset_scatter(abd_t *, size_t);
|
||||
void abd_free_struct(abd_t *);
|
||||
void abd_alloc_chunks(abd_t *, size_t);
|
||||
void abd_free_chunks(abd_t *);
|
||||
boolean_t abd_size_alloc_linear(size_t);
|
||||
void abd_update_scatter_stats(abd_t *, abd_stats_op_t);
|
||||
void abd_update_linear_stats(abd_t *, abd_stats_op_t);
|
||||
void abd_verify_scatter(abd_t *);
|
||||
void abd_free_linear_page(abd_t *);
|
||||
void abd_enter_critical(unsigned long);
|
||||
void abd_exit_critical(unsigned long);
|
||||
/* OS specific abd_iter functions */
|
||||
void abd_iter_init(struct abd_iter *, abd_t *);
|
||||
boolean_t abd_iter_at_end(struct abd_iter *);
|
||||
void abd_iter_advance(struct abd_iter *, size_t);
|
||||
void abd_iter_map(struct abd_iter *);
|
||||
void abd_iter_unmap(struct abd_iter *);
|
||||
|
||||
/*
|
||||
* Helper macros
|
||||
*/
|
||||
#define ABDSTAT(stat) (abd_stats.stat.value.ui64)
|
||||
#define ABDSTAT_INCR(stat, val) \
|
||||
atomic_add_64(&abd_stats.stat.value.ui64, (val))
|
||||
#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
|
||||
#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)
|
||||
|
||||
#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter)
|
||||
#define ABD_LINEAR_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ABD_IMPL_H */
|
|
@ -39,6 +39,7 @@ KERNEL_C = \
|
|||
zpool_prop.c \
|
||||
zprop_common.c \
|
||||
abd.c \
|
||||
abd_os.c \
|
||||
aggsum.c \
|
||||
arc.c \
|
||||
arc_os.c \
|
||||
|
|
|
@ -127,7 +127,7 @@ SRCS+= spl_atomic.c
|
|||
.endif
|
||||
|
||||
#os/freebsd/zfs
|
||||
SRCS+= abd.c \
|
||||
SRCS+= abd_os.c \
|
||||
crypto_os.c \
|
||||
dmu_os.c \
|
||||
hkdf.c \
|
||||
|
@ -169,7 +169,8 @@ SRCS+= zfeature_common.c \
|
|||
zprop_common.c
|
||||
|
||||
#zfs
|
||||
SRCS+= aggsum.c \
|
||||
SRCS+= abd.c \
|
||||
aggsum.c \
|
||||
arc.c \
|
||||
arc_os.c \
|
||||
blkptr.c \
|
||||
|
|
|
@ -0,0 +1,433 @@
|
|||
/*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* See abd.c for a general overview of the arc buffered data (ABD).
|
||||
*
|
||||
* Using a large proportion of scattered ABDs decreases ARC fragmentation since
|
||||
* when we are at the limit of allocatable space, using equal-size chunks will
|
||||
* allow us to quickly reclaim enough space for a new large allocation (assuming
|
||||
* it is also scattered).
|
||||
*
|
||||
* ABDs are allocated scattered by default unless the caller uses
|
||||
* abd_alloc_linear() or zfs_abd_scatter_enabled is disabled.
|
||||
*/
|
||||
|
||||
#include <sys/abd_impl.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
typedef struct abd_stats {
|
||||
kstat_named_t abdstat_struct_size;
|
||||
kstat_named_t abdstat_scatter_cnt;
|
||||
kstat_named_t abdstat_scatter_data_size;
|
||||
kstat_named_t abdstat_scatter_chunk_waste;
|
||||
kstat_named_t abdstat_linear_cnt;
|
||||
kstat_named_t abdstat_linear_data_size;
|
||||
} abd_stats_t;
|
||||
|
||||
static abd_stats_t abd_stats = {
|
||||
/* Amount of memory occupied by all of the abd_t struct allocations */
|
||||
{ "struct_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of scatter ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset()).
|
||||
*/
|
||||
{ "scatter_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
|
||||
{ "scatter_data_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The amount of space wasted at the end of the last chunk across all
|
||||
* scatter ABDs tracked by scatter_cnt.
|
||||
*/
|
||||
{ "scatter_chunk_waste", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of linear ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset() and abd_get_from_buf()). If an
|
||||
* ABD takes ownership of its buf then it will become tracked.
|
||||
*/
|
||||
{ "linear_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all linear ABDs tracked by linear_cnt */
|
||||
{ "linear_data_size", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
/*
|
||||
* The size of the chunks ABD allocates. Because the sizes allocated from the
|
||||
* kmem_cache can't change, this tunable can only be modified at boot. Changing
|
||||
* it at runtime would cause ABD iteration to work incorrectly for ABDs which
|
||||
* were allocated with the old size, so a safeguard has been put in place which
|
||||
* will cause the machine to panic if you change it and try to access the data
|
||||
* within a scattered ABD.
|
||||
*/
|
||||
size_t zfs_abd_chunk_size = 4096;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
|
||||
&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates");
|
||||
#endif
|
||||
|
||||
kmem_cache_t *abd_chunk_cache;
|
||||
static kstat_t *abd_ksp;
|
||||
|
||||
static void
|
||||
abd_free_chunk(void *c)
|
||||
{
|
||||
kmem_cache_free(abd_chunk_cache, c);
|
||||
}
|
||||
|
||||
static size_t
|
||||
abd_chunkcnt_for_bytes(size_t size)
|
||||
{
|
||||
return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_scatter_chunkcnt(abd_t *abd)
|
||||
{
|
||||
ASSERT(!abd_is_linear(abd));
|
||||
return (abd_chunkcnt_for_bytes(
|
||||
ABD_SCATTER(abd).abd_offset + abd->abd_size));
|
||||
}
|
||||
|
||||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
size_t n = abd_scatter_chunkcnt(abd);
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
n * zfs_abd_chunk_size - abd->abd_size);
|
||||
} else {
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
abd->abd_size - n * zfs_abd_chunk_size);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
|
||||
} else {
|
||||
ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_verify_scatter(abd_t *abd)
|
||||
{
|
||||
/*
|
||||
* There is no scatter linear pages in FreeBSD so there is an
|
||||
* if an error if the ABD has been marked as a linear page.
|
||||
*/
|
||||
VERIFY(!abd_is_linear_page(abd));
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
|
||||
zfs_abd_chunk_size);
|
||||
size_t n = abd_scatter_chunkcnt(abd);
|
||||
for (int i = 0; i < n; i++) {
|
||||
ASSERT3P(
|
||||
ABD_SCATTER(abd).abd_chunks[i], !=, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
size_t n = abd_chunkcnt_for_bytes(size);
|
||||
for (int i = 0; i < n; i++) {
|
||||
void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
ASSERT3P(c, !=, NULL);
|
||||
ABD_SCATTER(abd).abd_chunks[i] = c;
|
||||
}
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_chunks(abd_t *abd)
|
||||
{
|
||||
size_t n = abd_scatter_chunkcnt(abd);
|
||||
for (int i = 0; i < n; i++) {
|
||||
abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]);
|
||||
}
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_alloc_struct(size_t size)
|
||||
{
|
||||
size_t chunkcnt = abd_chunkcnt_for_bytes(size);
|
||||
size_t abd_size = offsetof(abd_t,
|
||||
abd_u.abd_scatter.abd_chunks[chunkcnt]);
|
||||
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
ABDSTAT_INCR(abdstat_struct_size, abd_size);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_struct(abd_t *abd)
|
||||
{
|
||||
size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd);
|
||||
int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
|
||||
kmem_free(abd, size);
|
||||
ABDSTAT_INCR(abdstat_struct_size, -size);
|
||||
}
|
||||
|
||||
void
|
||||
abd_init(void)
|
||||
{
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
|
||||
NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG);
|
||||
|
||||
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
|
||||
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
||||
if (abd_ksp != NULL) {
|
||||
abd_ksp->ks_data = &abd_stats;
|
||||
kstat_install(abd_ksp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_fini(void)
|
||||
{
|
||||
if (abd_ksp != NULL) {
|
||||
kstat_delete(abd_ksp);
|
||||
abd_ksp = NULL;
|
||||
}
|
||||
|
||||
kmem_cache_destroy(abd_chunk_cache);
|
||||
abd_chunk_cache = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_linear_page(abd_t *abd)
|
||||
{
|
||||
/*
|
||||
* FreeBSD does not have have scatter linear pages
|
||||
* so there is an error.
|
||||
*/
|
||||
VERIFY(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're going to use this ABD for doing I/O using the block layer, the
|
||||
* consumer of the ABD data doesn't care if it's scattered or not, and we don't
|
||||
* plan to store this ABD in memory for a long period of time, we should
|
||||
* allocate the ABD type that requires the least data copying to do the I/O.
|
||||
*
|
||||
* Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
|
||||
* using a scatter/gather list we should switch to that and replace this call
|
||||
* with vanilla abd_alloc().
|
||||
*/
|
||||
abd_t *
|
||||
abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
||||
{
|
||||
return (abd_alloc_linear(size, is_metadata));
|
||||
}
|
||||
|
||||
/*
|
||||
* This is just a helper function to abd_get_offset_scatter() to alloc a
|
||||
* scatter ABD using the calculated chunkcnt based on the offset within the
|
||||
* parent ABD.
|
||||
*/
|
||||
static abd_t *
|
||||
abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt)
|
||||
{
|
||||
size_t abd_size = offsetof(abd_t,
|
||||
abd_u.abd_scatter.abd_chunks[chunkcnt]);
|
||||
abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
ABDSTAT_INCR(abdstat_struct_size, abd_size);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_scatter(abd_t *sabd, size_t off)
|
||||
{
|
||||
abd_t *abd = NULL;
|
||||
|
||||
abd_verify(sabd);
|
||||
ASSERT3U(off, <=, sabd->abd_size);
|
||||
|
||||
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
|
||||
size_t chunkcnt = abd_scatter_chunkcnt(sabd) -
|
||||
(new_offset / zfs_abd_chunk_size);
|
||||
|
||||
abd = abd_alloc_scatter_offset_chunkcnt(chunkcnt);
|
||||
|
||||
/*
|
||||
* Even if this buf is filesystem metadata, we only track that
|
||||
* if we own the underlying data buffer, which is not true in
|
||||
* this case. Therefore, we don't ever use ABD_FLAG_META here.
|
||||
*/
|
||||
abd->abd_flags = 0;
|
||||
|
||||
ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size;
|
||||
ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size;
|
||||
|
||||
/* Copy the scatterlist starting at the correct offset */
|
||||
(void) memcpy(&ABD_SCATTER(abd).abd_chunks,
|
||||
&ABD_SCATTER(sabd).abd_chunks[new_offset /
|
||||
zfs_abd_chunk_size],
|
||||
chunkcnt * sizeof (void *));
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) % zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_index(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((ABD_SCATTER(aiter->iter_abd).abd_offset +
|
||||
aiter->iter_pos) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the abd_iter.
|
||||
*/
|
||||
void
|
||||
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
abd_verify(abd);
|
||||
aiter->iter_abd = abd;
|
||||
aiter->iter_pos = 0;
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is just a helper function to see if we have exhausted the
|
||||
* abd_iter and reached the end.
|
||||
*/
|
||||
boolean_t
|
||||
abd_iter_at_end(struct abd_iter *aiter)
|
||||
{
|
||||
return (aiter->iter_pos == aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the iterator by a certain amount. Cannot be called when a chunk is
|
||||
* in use. This can be safely called when the aiter has already exhausted, in
|
||||
* which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* There's nothing left to advance to, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
aiter->iter_pos += amount;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the current chunk into aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
void *paddr;
|
||||
size_t offset = 0;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* Panic if someone has changed zfs_abd_chunk_size */
|
||||
IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
|
||||
ABD_SCATTER(aiter->iter_abd).abd_chunk_size);
|
||||
|
||||
/* There's nothing left to iterate over, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
offset = aiter->iter_pos;
|
||||
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(aiter->iter_abd);
|
||||
} else {
|
||||
size_t index = abd_iter_scatter_chunk_index(aiter);
|
||||
offset = abd_iter_scatter_chunk_offset(aiter);
|
||||
aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index];
|
||||
}
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmap the current chunk from aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
/* There's nothing left to unmap, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
void
|
||||
abd_enter_critical(unsigned long flags)
|
||||
{
|
||||
critical_enter();
|
||||
}
|
||||
|
||||
void
|
||||
abd_exit_critical(unsigned long flags)
|
||||
{
|
||||
critical_exit();
|
||||
}
|
|
@ -7,7 +7,7 @@ ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
|
|||
|
||||
ccflags-y += -I@abs_top_srcdir@/module/os/linux/zfs
|
||||
|
||||
$(MODULE)-objs += ../os/linux/zfs/abd.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/abd_os.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/arc_os.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/mmp_os.o
|
||||
$(MODULE)-objs += ../os/linux/zfs/policy.o
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,891 @@
|
|||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* See abd.c for an general overview of the arc buffered data (ABD).
|
||||
*
|
||||
* Linear buffers act exactly like normal buffers and are always mapped into the
|
||||
* kernel's virtual memory space, while scattered ABD data chunks are allocated
|
||||
* as physical pages and then mapped in only while they are actually being
|
||||
* accessed through one of the abd_* library functions. Using scattered ABDs
|
||||
* provides several benefits:
|
||||
*
|
||||
* (1) They avoid use of kmem_*, preventing performance problems where running
|
||||
* kmem_reap on very large memory systems never finishes and causes
|
||||
* constant TLB shootdowns.
|
||||
*
|
||||
* (2) Fragmentation is less of an issue since when we are at the limit of
|
||||
* allocatable space, we won't have to search around for a long free
|
||||
* hole in the VA space for large ARC allocations. Each chunk is mapped in
|
||||
* individually, so even if we are using HIGHMEM (see next point) we
|
||||
* wouldn't need to worry about finding a contiguous address range.
|
||||
*
|
||||
* (3) If we are not using HIGHMEM, then all physical memory is always
|
||||
* mapped into the kernel's address space, so we also avoid the map /
|
||||
* unmap costs on each ABD access.
|
||||
*
|
||||
* If we are not using HIGHMEM, scattered buffers which have only one chunk
|
||||
* can be treated as linear buffers, because they are contiguous in the
|
||||
* kernel's virtual address space. See abd_alloc_chunks() for details.
|
||||
*/
|
||||
|
||||
#include <sys/abd_impl.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#ifdef _KERNEL
|
||||
#include <linux/kmap_compat.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#else
|
||||
#define MAX_ORDER 1
|
||||
#endif
|
||||
|
||||
typedef struct abd_stats {
|
||||
kstat_named_t abdstat_struct_size;
|
||||
kstat_named_t abdstat_linear_cnt;
|
||||
kstat_named_t abdstat_linear_data_size;
|
||||
kstat_named_t abdstat_scatter_cnt;
|
||||
kstat_named_t abdstat_scatter_data_size;
|
||||
kstat_named_t abdstat_scatter_chunk_waste;
|
||||
kstat_named_t abdstat_scatter_orders[MAX_ORDER];
|
||||
kstat_named_t abdstat_scatter_page_multi_chunk;
|
||||
kstat_named_t abdstat_scatter_page_multi_zone;
|
||||
kstat_named_t abdstat_scatter_page_alloc_retry;
|
||||
kstat_named_t abdstat_scatter_sg_table_retry;
|
||||
} abd_stats_t;
|
||||
|
||||
static abd_stats_t abd_stats = {
|
||||
/* Amount of memory occupied by all of the abd_t struct allocations */
|
||||
{ "struct_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of linear ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset() and abd_get_from_buf()). If an
|
||||
* ABD takes ownership of its buf then it will become tracked.
|
||||
*/
|
||||
{ "linear_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all linear ABDs tracked by linear_cnt */
|
||||
{ "linear_data_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of scatter ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset()).
|
||||
*/
|
||||
{ "scatter_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
|
||||
{ "scatter_data_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The amount of space wasted at the end of the last chunk across all
|
||||
* scatter ABDs tracked by scatter_cnt.
|
||||
*/
|
||||
{ "scatter_chunk_waste", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of compound allocations of a given order. These
|
||||
* allocations are spread over all currently allocated ABDs, and
|
||||
* act as a measure of memory fragmentation.
|
||||
*/
|
||||
{ { "scatter_order_N", KSTAT_DATA_UINT64 } },
|
||||
/*
|
||||
* The number of scatter ABDs which contain multiple chunks.
|
||||
* ABDs are preferentially allocated from the minimum number of
|
||||
* contiguous multi-page chunks, a single chunk is optimal.
|
||||
*/
|
||||
{ "scatter_page_multi_chunk", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of scatter ABDs which are split across memory zones.
|
||||
* ABDs are preferentially allocated using pages from a single zone.
|
||||
*/
|
||||
{ "scatter_page_multi_zone", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The total number of retries encountered when attempting to
|
||||
* allocate the pages to populate the scatter ABD.
|
||||
*/
|
||||
{ "scatter_page_alloc_retry", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The total number of retries encountered when attempting to
|
||||
* allocate the sg table for an ABD.
|
||||
*/
|
||||
{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
#define abd_for_each_sg(abd, sg, n, i) \
|
||||
for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
|
||||
|
||||
unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
|
||||
|
||||
/*
|
||||
* zfs_abd_scatter_min_size is the minimum allocation size to use scatter
|
||||
* ABD's. Smaller allocations will use linear ABD's which uses
|
||||
* zio_[data_]buf_alloc().
|
||||
*
|
||||
* Scatter ABD's use at least one page each, so sub-page allocations waste
|
||||
* some space when allocated as scatter (e.g. 2KB scatter allocation wastes
|
||||
* half of each page). Using linear ABD's for small allocations means that
|
||||
* they will be put on slabs which contain many allocations. This can
|
||||
* improve memory efficiency, but it also makes it much harder for ARC
|
||||
* evictions to actually free pages, because all the buffers on one slab need
|
||||
* to be freed in order for the slab (and underlying pages) to be freed.
|
||||
* Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's
|
||||
* possible for them to actually waste more memory than scatter (one page per
|
||||
* buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th).
|
||||
*
|
||||
* Spill blocks are typically 512B and are heavily used on systems running
|
||||
* selinux with the default dnode size and the `xattr=sa` property set.
|
||||
*
|
||||
* By default we use linear allocations for 512B and 1KB, and scatter
|
||||
* allocations for larger (1.5KB and up).
|
||||
*/
|
||||
int zfs_abd_scatter_min_size = 512 * 3;
|
||||
|
||||
static kmem_cache_t *abd_cache = NULL;
|
||||
static kstat_t *abd_ksp;
|
||||
|
||||
static size_t
|
||||
abd_chunkcnt_for_bytes(size_t size)
|
||||
{
|
||||
return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_alloc_struct(size_t size)
|
||||
{
|
||||
/*
|
||||
* In Linux we do not use the size passed in during ABD
|
||||
* allocation, so we just ignore it.
|
||||
*/
|
||||
abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_struct(abd_t *abd)
|
||||
{
|
||||
kmem_cache_free(abd_cache, abd);
|
||||
ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t));
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Mark zfs data pages so they can be excluded from kernel crash dumps
|
||||
*/
|
||||
#ifdef _LP64
|
||||
#define ABD_FILE_CACHE_PAGE 0x2F5ABDF11ECAC4E
|
||||
|
||||
static inline void
|
||||
abd_mark_zfs_page(struct page *page)
|
||||
{
|
||||
get_page(page);
|
||||
SetPagePrivate(page);
|
||||
set_page_private(page, ABD_FILE_CACHE_PAGE);
|
||||
}
|
||||
|
||||
static inline void
|
||||
abd_unmark_zfs_page(struct page *page)
|
||||
{
|
||||
set_page_private(page, 0UL);
|
||||
ClearPagePrivate(page);
|
||||
put_page(page);
|
||||
}
|
||||
#else
|
||||
#define abd_mark_zfs_page(page)
|
||||
#define abd_unmark_zfs_page(page)
|
||||
#endif /* _LP64 */
|
||||
|
||||
#ifndef CONFIG_HIGHMEM
|
||||
|
||||
#ifndef __GFP_RECLAIM
|
||||
#define __GFP_RECLAIM __GFP_WAIT
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The goal is to minimize fragmentation by preferentially populating ABDs
|
||||
* with higher order compound pages from a single zone. Allocation size is
|
||||
* progressively decreased until it can be satisfied without performing
|
||||
* reclaim or compaction. When necessary this function will degenerate to
|
||||
* allocating individual pages and allowing reclaim to satisfy allocations.
|
||||
*/
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
struct list_head pages;
|
||||
struct sg_table table;
|
||||
struct scatterlist *sg;
|
||||
struct page *page, *tmp_page = NULL;
|
||||
gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
|
||||
gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
|
||||
int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
|
||||
int nr_pages = abd_chunkcnt_for_bytes(size);
|
||||
int chunks = 0, zones = 0;
|
||||
size_t remaining_size;
|
||||
int nid = NUMA_NO_NODE;
|
||||
int alloc_pages = 0;
|
||||
|
||||
INIT_LIST_HEAD(&pages);
|
||||
|
||||
while (alloc_pages < nr_pages) {
|
||||
unsigned chunk_pages;
|
||||
int order;
|
||||
|
||||
order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
|
||||
chunk_pages = (1U << order);
|
||||
|
||||
page = alloc_pages_node(nid, order ? gfp_comp : gfp, order);
|
||||
if (page == NULL) {
|
||||
if (order == 0) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
|
||||
schedule_timeout_interruptible(1);
|
||||
} else {
|
||||
max_order = MAX(0, order - 1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
list_add_tail(&page->lru, &pages);
|
||||
|
||||
if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))
|
||||
zones++;
|
||||
|
||||
nid = page_to_nid(page);
|
||||
ABDSTAT_BUMP(abdstat_scatter_orders[order]);
|
||||
chunks++;
|
||||
alloc_pages += chunk_pages;
|
||||
}
|
||||
|
||||
ASSERT3S(alloc_pages, ==, nr_pages);
|
||||
|
||||
while (sg_alloc_table(&table, chunks, gfp)) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
|
||||
schedule_timeout_interruptible(1);
|
||||
}
|
||||
|
||||
sg = table.sgl;
|
||||
remaining_size = size;
|
||||
list_for_each_entry_safe(page, tmp_page, &pages, lru) {
|
||||
size_t sg_size = MIN(PAGESIZE << compound_order(page),
|
||||
remaining_size);
|
||||
sg_set_page(sg, page, sg_size, 0);
|
||||
abd_mark_zfs_page(page);
|
||||
remaining_size -= sg_size;
|
||||
|
||||
sg = sg_next(sg);
|
||||
list_del(&page->lru);
|
||||
}
|
||||
|
||||
/*
|
||||
* These conditions ensure that a possible transformation to a linear
|
||||
* ABD would be valid.
|
||||
*/
|
||||
ASSERT(!PageHighMem(sg_page(table.sgl)));
|
||||
ASSERT0(ABD_SCATTER(abd).abd_offset);
|
||||
|
||||
if (table.nents == 1) {
|
||||
/*
|
||||
* Since there is only one entry, this ABD can be represented
|
||||
* as a linear buffer. All single-page (4K) ABD's can be
|
||||
* represented this way. Some multi-page ABD's can also be
|
||||
* represented this way, if we were able to allocate a single
|
||||
* "chunk" (higher-order "page" which represents a power-of-2
|
||||
* series of physically-contiguous pages). This is often the
|
||||
* case for 2-page (8K) ABD's.
|
||||
*
|
||||
* Representing a single-entry scatter ABD as a linear ABD
|
||||
* has the performance advantage of avoiding the copy (and
|
||||
* allocation) in abd_borrow_buf_copy / abd_return_buf_copy.
|
||||
* A performance increase of around 5% has been observed for
|
||||
* ARC-cached reads (of small blocks which can take advantage
|
||||
* of this).
|
||||
*
|
||||
* Note that this optimization is only possible because the
|
||||
* pages are always mapped into the kernel's address space.
|
||||
* This is not the case for highmem pages, so the
|
||||
* optimization can not be made there.
|
||||
*/
|
||||
abd->abd_flags |= ABD_FLAG_LINEAR;
|
||||
abd->abd_flags |= ABD_FLAG_LINEAR_PAGE;
|
||||
abd->abd_u.abd_linear.abd_sgl = table.sgl;
|
||||
ABD_LINEAR_BUF(abd) = page_address(sg_page(table.sgl));
|
||||
} else if (table.nents > 1) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
|
||||
abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
|
||||
|
||||
if (zones) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);
|
||||
abd->abd_flags |= ABD_FLAG_MULTI_ZONE;
|
||||
}
|
||||
|
||||
ABD_SCATTER(abd).abd_sgl = table.sgl;
|
||||
ABD_SCATTER(abd).abd_nents = table.nents;
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
/*
|
||||
* Allocate N individual pages to construct a scatter ABD. This function
|
||||
* makes no attempt to request contiguous pages and requires the minimal
|
||||
* number of kernel interfaces. It's designed for maximum compatibility.
|
||||
*/
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
struct scatterlist *sg = NULL;
|
||||
struct sg_table table;
|
||||
struct page *page;
|
||||
gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
|
||||
int nr_pages = abd_chunkcnt_for_bytes(size);
|
||||
int i = 0;
|
||||
|
||||
while (sg_alloc_table(&table, nr_pages, gfp)) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
|
||||
schedule_timeout_interruptible(1);
|
||||
}
|
||||
|
||||
ASSERT3U(table.nents, ==, nr_pages);
|
||||
ABD_SCATTER(abd).abd_sgl = table.sgl;
|
||||
ABD_SCATTER(abd).abd_nents = nr_pages;
|
||||
|
||||
abd_for_each_sg(abd, sg, nr_pages, i) {
|
||||
while ((page = __page_cache_alloc(gfp)) == NULL) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
|
||||
schedule_timeout_interruptible(1);
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_scatter_orders[0]);
|
||||
sg_set_page(sg, page, PAGESIZE, 0);
|
||||
abd_mark_zfs_page(page);
|
||||
}
|
||||
|
||||
if (nr_pages > 1) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
|
||||
abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
|
||||
}
|
||||
}
|
||||
#endif /* !CONFIG_HIGHMEM */
|
||||
|
||||
/*
|
||||
* This must be called if any of the sg_table allocation functions
|
||||
* are called.
|
||||
*/
|
||||
static void
|
||||
abd_free_sg_table(abd_t *abd)
|
||||
{
|
||||
struct sg_table table;
|
||||
|
||||
table.sgl = ABD_SCATTER(abd).abd_sgl;
|
||||
table.nents = table.orig_nents = ABD_SCATTER(abd).abd_nents;
|
||||
sg_free_table(&table);
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_chunks(abd_t *abd)
|
||||
{
|
||||
struct scatterlist *sg = NULL;
|
||||
struct page *page;
|
||||
int nr_pages = ABD_SCATTER(abd).abd_nents;
|
||||
int order, i = 0;
|
||||
|
||||
if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);
|
||||
|
||||
if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
|
||||
|
||||
abd_for_each_sg(abd, sg, nr_pages, i) {
|
||||
page = sg_page(sg);
|
||||
abd_unmark_zfs_page(page);
|
||||
order = compound_order(page);
|
||||
__free_pages(page, order);
|
||||
ASSERT3U(sg->length, <=, PAGE_SIZE << order);
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);
|
||||
}
|
||||
abd_free_sg_table(abd);
|
||||
}
|
||||
|
||||
#else /* _KERNEL */
|
||||
|
||||
#ifndef PAGE_SHIFT
|
||||
#define PAGE_SHIFT (highbit64(PAGESIZE)-1)
|
||||
#endif
|
||||
|
||||
struct page;
|
||||
|
||||
#define zfs_kmap_atomic(chunk, km) ((void *)chunk)
|
||||
#define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0)
|
||||
#define local_irq_save(flags) do { (void)(flags); } while (0)
|
||||
#define local_irq_restore(flags) do { (void)(flags); } while (0)
|
||||
#define nth_page(pg, i) \
|
||||
((struct page *)((void *)(pg) + (i) * PAGESIZE))
|
||||
|
||||
struct scatterlist {
|
||||
struct page *page;
|
||||
int length;
|
||||
int end;
|
||||
};
|
||||
|
||||
static void
|
||||
sg_init_table(struct scatterlist *sg, int nr)
|
||||
{
|
||||
memset(sg, 0, nr * sizeof (struct scatterlist));
|
||||
sg[nr - 1].end = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This must be called if any of the sg_table allocation functions
|
||||
* are called.
|
||||
*/
|
||||
static void
|
||||
abd_free_sg_table(abd_t *abd)
|
||||
{
|
||||
int nents = ABD_SCATTER(abd).abd_nents;
|
||||
vmem_free(ABD_SCATTER(abd).abd_sgl,
|
||||
nents * sizeof (struct scatterlist));
|
||||
}
|
||||
|
||||
#define for_each_sg(sgl, sg, nr, i) \
|
||||
for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
|
||||
|
||||
static inline void
|
||||
sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
|
||||
unsigned int offset)
|
||||
{
|
||||
/* currently we don't use offset */
|
||||
ASSERT(offset == 0);
|
||||
sg->page = page;
|
||||
sg->length = len;
|
||||
}
|
||||
|
||||
static inline struct page *
|
||||
sg_page(struct scatterlist *sg)
|
||||
{
|
||||
return (sg->page);
|
||||
}
|
||||
|
||||
static inline struct scatterlist *
|
||||
sg_next(struct scatterlist *sg)
|
||||
{
|
||||
if (sg->end)
|
||||
return (NULL);
|
||||
|
||||
return (sg + 1);
|
||||
}
|
||||
|
||||
void
|
||||
abd_alloc_chunks(abd_t *abd, size_t size)
|
||||
{
|
||||
unsigned nr_pages = abd_chunkcnt_for_bytes(size);
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
|
||||
ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
|
||||
sizeof (struct scatterlist), KM_SLEEP);
|
||||
sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
|
||||
|
||||
abd_for_each_sg(abd, sg, nr_pages, i) {
|
||||
struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP);
|
||||
sg_set_page(sg, p, PAGESIZE, 0);
|
||||
}
|
||||
ABD_SCATTER(abd).abd_nents = nr_pages;
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_chunks(abd_t *abd)
|
||||
{
|
||||
int i, n = ABD_SCATTER(abd).abd_nents;
|
||||
struct scatterlist *sg;
|
||||
|
||||
abd_for_each_sg(abd, sg, n, i) {
|
||||
for (int j = 0; j < sg->length; j += PAGESIZE) {
|
||||
struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT);
|
||||
umem_free(p, PAGESIZE);
|
||||
}
|
||||
}
|
||||
abd_free_sg_table(abd);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
boolean_t
|
||||
abd_size_alloc_linear(size_t size)
|
||||
{
|
||||
return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size);
|
||||
} else {
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
(int)abd->abd_size
|
||||
-(int)P2ROUNDUP(abd->abd_size, PAGESIZE));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_update_linear_stats(abd_t *abd, abd_stats_op_t op)
|
||||
{
|
||||
ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR);
|
||||
if (op == ABDSTAT_INCR) {
|
||||
ABDSTAT_BUMP(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
|
||||
} else {
|
||||
ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_verify_scatter(abd_t *abd)
|
||||
{
|
||||
size_t n;
|
||||
int i = 0;
|
||||
struct scatterlist *sg = NULL;
|
||||
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);
|
||||
ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
|
||||
ABD_SCATTER(abd).abd_sgl->length);
|
||||
n = ABD_SCATTER(abd).abd_nents;
|
||||
abd_for_each_sg(abd, sg, n, i) {
|
||||
ASSERT3P(sg_page(sg), !=, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
|
||||
0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
|
||||
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
|
||||
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
||||
if (abd_ksp != NULL) {
|
||||
for (i = 0; i < MAX_ORDER; i++) {
|
||||
snprintf(abd_stats.abdstat_scatter_orders[i].name,
|
||||
KSTAT_STRLEN, "scatter_order_%d", i);
|
||||
abd_stats.abdstat_scatter_orders[i].data_type =
|
||||
KSTAT_DATA_UINT64;
|
||||
}
|
||||
abd_ksp->ks_data = &abd_stats;
|
||||
kstat_install(abd_ksp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_fini(void)
|
||||
{
|
||||
if (abd_ksp != NULL) {
|
||||
kstat_delete(abd_ksp);
|
||||
abd_ksp = NULL;
|
||||
}
|
||||
|
||||
if (abd_cache) {
|
||||
kmem_cache_destroy(abd_cache);
|
||||
abd_cache = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_free_linear_page(abd_t *abd)
|
||||
{
|
||||
/* Transform it back into a scatter ABD for freeing */
|
||||
struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl;
|
||||
abd->abd_flags &= ~ABD_FLAG_LINEAR;
|
||||
abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE;
|
||||
ABD_SCATTER(abd).abd_nents = 1;
|
||||
ABD_SCATTER(abd).abd_offset = 0;
|
||||
ABD_SCATTER(abd).abd_sgl = sg;
|
||||
abd_free_chunks(abd);
|
||||
|
||||
zfs_refcount_destroy(&abd->abd_children);
|
||||
abd_update_scatter_stats(abd, ABDSTAT_DECR);
|
||||
abd_free_struct(abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're going to use this ABD for doing I/O using the block layer, the
|
||||
* consumer of the ABD data doesn't care if it's scattered or not, and we don't
|
||||
* plan to store this ABD in memory for a long period of time, we should
|
||||
* allocate the ABD type that requires the least data copying to do the I/O.
|
||||
*
|
||||
* On Linux the optimal thing to do would be to use abd_get_offset() and
|
||||
* construct a new ABD which shares the original pages thereby eliminating
|
||||
* the copy. But for the moment a new linear ABD is allocated until this
|
||||
* performance optimization can be implemented.
|
||||
*/
|
||||
abd_t *
|
||||
abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
||||
{
|
||||
return (abd_alloc(size, is_metadata));
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_scatter(abd_t *sabd, size_t off)
|
||||
{
|
||||
abd_t *abd = NULL;
|
||||
int i = 0;
|
||||
struct scatterlist *sg = NULL;
|
||||
|
||||
abd_verify(sabd);
|
||||
ASSERT3U(off, <=, sabd->abd_size);
|
||||
|
||||
size_t new_offset = ABD_SCATTER(sabd).abd_offset + off;
|
||||
|
||||
abd = abd_alloc_struct(0);
|
||||
|
||||
/*
|
||||
* Even if this buf is filesystem metadata, we only track that
|
||||
* if we own the underlying data buffer, which is not true in
|
||||
* this case. Therefore, we don't ever use ABD_FLAG_META here.
|
||||
*/
|
||||
abd->abd_flags = 0;
|
||||
|
||||
abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
|
||||
if (new_offset < sg->length)
|
||||
break;
|
||||
new_offset -= sg->length;
|
||||
}
|
||||
|
||||
ABD_SCATTER(abd).abd_sgl = sg;
|
||||
ABD_SCATTER(abd).abd_offset = new_offset;
|
||||
ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the abd_iter.
|
||||
*/
|
||||
void
|
||||
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
abd_verify(abd);
|
||||
aiter->iter_abd = abd;
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
aiter->iter_pos = 0;
|
||||
if (abd_is_linear(abd)) {
|
||||
aiter->iter_offset = 0;
|
||||
aiter->iter_sg = NULL;
|
||||
} else {
|
||||
aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
|
||||
aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is just a helper function to see if we have exhausted the
|
||||
* abd_iter and reached the end.
|
||||
*/
|
||||
boolean_t
|
||||
abd_iter_at_end(struct abd_iter *aiter)
|
||||
{
|
||||
return (aiter->iter_pos == aiter->iter_abd->abd_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the iterator by a certain amount. Cannot be called when a chunk is
|
||||
* in use. This can be safely called when the aiter has already exhausted, in
|
||||
* which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* There's nothing left to advance to, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
aiter->iter_pos += amount;
|
||||
aiter->iter_offset += amount;
|
||||
if (!abd_is_linear(aiter->iter_abd)) {
|
||||
while (aiter->iter_offset >= aiter->iter_sg->length) {
|
||||
aiter->iter_offset -= aiter->iter_sg->length;
|
||||
aiter->iter_sg = sg_next(aiter->iter_sg);
|
||||
if (aiter->iter_sg == NULL) {
|
||||
ASSERT0(aiter->iter_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the current chunk into aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
void *paddr;
|
||||
size_t offset = 0;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* There's nothing left to iterate over, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
|
||||
offset = aiter->iter_offset;
|
||||
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
|
||||
paddr = ABD_LINEAR_BUF(aiter->iter_abd);
|
||||
} else {
|
||||
offset = aiter->iter_offset;
|
||||
aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
|
||||
paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
|
||||
km_table[aiter->iter_km]);
|
||||
}
|
||||
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmap the current chunk from aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
/* There's nothing left to unmap, so do nothing */
|
||||
if (abd_iter_at_end(aiter))
|
||||
return;
|
||||
|
||||
if (!abd_is_linear(aiter->iter_abd)) {
|
||||
/* LINTED E_FUNC_SET_NOT_USED */
|
||||
zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
|
||||
km_table[aiter->iter_km]);
|
||||
}
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
void
|
||||
abd_enter_critical(unsigned long flags)
|
||||
{
|
||||
local_irq_save(flags);
|
||||
}
|
||||
|
||||
void
|
||||
abd_exit_critical(unsigned long flags)
|
||||
{
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
/*
|
||||
* bio_nr_pages for ABD.
|
||||
* @off is the offset in @abd
|
||||
*/
|
||||
unsigned long
|
||||
abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
|
||||
{
|
||||
unsigned long pos;
|
||||
|
||||
if (abd_is_linear(abd))
|
||||
pos = (unsigned long)abd_to_buf(abd) + off;
|
||||
else
|
||||
pos = ABD_SCATTER(abd).abd_offset + off;
|
||||
|
||||
return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
|
||||
(pos >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* bio_map for scatter ABD.
|
||||
* @off is the offset in @abd
|
||||
* Remaining IO size is returned
|
||||
*/
|
||||
unsigned int
|
||||
abd_scatter_bio_map_off(struct bio *bio, abd_t *abd,
|
||||
unsigned int io_size, size_t off)
|
||||
{
|
||||
int i;
|
||||
struct abd_iter aiter;
|
||||
|
||||
ASSERT(!abd_is_linear(abd));
|
||||
ASSERT3U(io_size, <=, abd->abd_size - off);
|
||||
|
||||
abd_iter_init(&aiter, abd);
|
||||
abd_iter_advance(&aiter, off);
|
||||
|
||||
for (i = 0; i < bio->bi_max_vecs; i++) {
|
||||
struct page *pg;
|
||||
size_t len, sgoff, pgoff;
|
||||
struct scatterlist *sg;
|
||||
|
||||
if (io_size <= 0)
|
||||
break;
|
||||
|
||||
sg = aiter.iter_sg;
|
||||
sgoff = aiter.iter_offset;
|
||||
pgoff = sgoff & (PAGESIZE - 1);
|
||||
len = MIN(io_size, PAGESIZE - pgoff);
|
||||
ASSERT(len > 0);
|
||||
|
||||
pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);
|
||||
if (bio_add_page(bio, pg, len, pgoff) != len)
|
||||
break;
|
||||
|
||||
io_size -= len;
|
||||
abd_iter_advance(&aiter, len);
|
||||
}
|
||||
|
||||
return (io_size);
|
||||
}
|
||||
|
||||
/* Tunable Parameters */
|
||||
module_param(zfs_abd_scatter_enabled, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_enabled,
|
||||
"Toggle whether ABD allocations must be linear.");
|
||||
module_param(zfs_abd_scatter_min_size, int, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_min_size,
|
||||
"Minimum size of scatter allocations.");
|
||||
/* CSTYLED */
|
||||
module_param(zfs_abd_scatter_max_order, uint, 0644);
|
||||
MODULE_PARM_DESC(zfs_abd_scatter_max_order,
|
||||
"Maximum order allocation used for a scatter ABD.");
|
||||
#endif
|
|
@ -14,6 +14,7 @@ ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
|
|||
# Suppress unused-value warnings in sparc64 architecture headers
|
||||
ccflags-$(CONFIG_SPARC64) += -Wno-unused-value
|
||||
|
||||
$(MODULE)-objs += abd.o
|
||||
$(MODULE)-objs += aggsum.o
|
||||
$(MODULE)-objs += arc.o
|
||||
$(MODULE)-objs += blkptr.o
|
||||
|
|
|
@ -1,17 +1,26 @@
|
|||
/*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2019 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -50,11 +59,6 @@
|
|||
* +----------------->| chunk N-1 |
|
||||
* +-----------+
|
||||
*
|
||||
* Using a large proportion of scattered ABDs decreases ARC fragmentation since
|
||||
* when we are at the limit of allocatable space, using equal-size chunks will
|
||||
* allow us to quickly reclaim enough space for a new large allocation (assuming
|
||||
* it is also scattered).
|
||||
*
|
||||
* In addition to directly allocating a linear or scattered ABD, it is also
|
||||
* possible to create an ABD by requesting the "sub-ABD" starting at an offset
|
||||
* within an existing ABD. In linear buffers this is simple (set abd_buf of
|
||||
|
@ -83,186 +87,55 @@
|
|||
* compare, copy, read, write, and fill with zeroes. If you need a custom
|
||||
* function which progressively accesses the whole ABD, use the abd_iterate_*
|
||||
* functions.
|
||||
*
|
||||
* It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
|
||||
* B_FALSE.
|
||||
*/
|
||||
|
||||
#include <sys/abd.h>
|
||||
#include <sys/abd_impl.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
||||
typedef struct abd_stats {
|
||||
kstat_named_t abdstat_struct_size;
|
||||
kstat_named_t abdstat_scatter_cnt;
|
||||
kstat_named_t abdstat_scatter_data_size;
|
||||
kstat_named_t abdstat_scatter_chunk_waste;
|
||||
kstat_named_t abdstat_linear_cnt;
|
||||
kstat_named_t abdstat_linear_data_size;
|
||||
} abd_stats_t;
|
||||
/* see block comment above for description */
|
||||
int zfs_abd_scatter_enabled = B_TRUE;
|
||||
|
||||
static abd_stats_t abd_stats = {
|
||||
/* Amount of memory occupied by all of the abd_t struct allocations */
|
||||
{ "struct_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of scatter ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset()).
|
||||
*/
|
||||
{ "scatter_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
|
||||
{ "scatter_data_size", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The amount of space wasted at the end of the last chunk across all
|
||||
* scatter ABDs tracked by scatter_cnt.
|
||||
*/
|
||||
{ "scatter_chunk_waste", KSTAT_DATA_UINT64 },
|
||||
/*
|
||||
* The number of linear ABDs which are currently allocated, excluding
|
||||
* ABDs which don't own their data (for instance the ones which were
|
||||
* allocated through abd_get_offset() and abd_get_from_buf()). If an
|
||||
* ABD takes ownership of its buf then it will become tracked.
|
||||
*/
|
||||
{ "linear_cnt", KSTAT_DATA_UINT64 },
|
||||
/* Amount of data stored in all linear ABDs tracked by linear_cnt */
|
||||
{ "linear_data_size", KSTAT_DATA_UINT64 },
|
||||
};
|
||||
|
||||
#define ABDSTAT(stat) (abd_stats.stat.value.ui64)
|
||||
#define ABDSTAT_INCR(stat, val) \
|
||||
atomic_add_64(&abd_stats.stat.value.ui64, (val))
|
||||
#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
|
||||
#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)
|
||||
|
||||
/*
|
||||
* It is possible to make all future ABDs be linear by setting this to B_FALSE.
|
||||
* Otherwise, ABDs are allocated scattered by default unless the caller uses
|
||||
* abd_alloc_linear().
|
||||
*/
|
||||
boolean_t zfs_abd_scatter_enabled = B_TRUE;
|
||||
|
||||
/*
|
||||
* The size of the chunks ABD allocates. Because the sizes allocated from the
|
||||
* kmem_cache can't change, this tunable can only be modified at boot. Changing
|
||||
* it at runtime would cause ABD iteration to work incorrectly for ABDs which
|
||||
* were allocated with the old size, so a safeguard has been put in place which
|
||||
* will cause the machine to panic if you change it and try to access the data
|
||||
* within a scattered ABD.
|
||||
*/
|
||||
size_t zfs_abd_chunk_size = 4096;
|
||||
|
||||
#if defined(_KERNEL)
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN,
|
||||
&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers");
|
||||
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN,
|
||||
&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates");
|
||||
#endif
|
||||
|
||||
kmem_cache_t *abd_chunk_cache;
|
||||
static kstat_t *abd_ksp;
|
||||
|
||||
extern inline boolean_t abd_is_linear(abd_t *abd);
|
||||
extern inline void abd_copy(abd_t *dabd, abd_t *sabd, size_t size);
|
||||
extern inline void abd_copy_from_buf(abd_t *abd, const void *buf, size_t size);
|
||||
extern inline void abd_copy_to_buf(void* buf, abd_t *abd, size_t size);
|
||||
extern inline int abd_cmp_buf(abd_t *abd, const void *buf, size_t size);
|
||||
extern inline void abd_zero(abd_t *abd, size_t size);
|
||||
|
||||
static void *
|
||||
abd_alloc_chunk()
|
||||
boolean_t
|
||||
abd_is_linear(abd_t *abd)
|
||||
{
|
||||
void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE);
|
||||
ASSERT3P(c, !=, NULL);
|
||||
return (c);
|
||||
return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
static void
|
||||
abd_free_chunk(void *c)
|
||||
boolean_t
|
||||
abd_is_linear_page(abd_t *abd)
|
||||
{
|
||||
kmem_cache_free(abd_chunk_cache, c);
|
||||
return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ?
|
||||
B_TRUE : B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
abd_init(void)
|
||||
{
|
||||
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
|
||||
NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG);
|
||||
|
||||
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
|
||||
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
|
||||
if (abd_ksp != NULL) {
|
||||
abd_ksp->ks_data = &abd_stats;
|
||||
kstat_install(abd_ksp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
abd_fini(void)
|
||||
{
|
||||
if (abd_ksp != NULL) {
|
||||
kstat_delete(abd_ksp);
|
||||
abd_ksp = NULL;
|
||||
}
|
||||
|
||||
kmem_cache_destroy(abd_chunk_cache);
|
||||
abd_chunk_cache = NULL;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_chunkcnt_for_bytes(size_t size)
|
||||
{
|
||||
return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_scatter_chunkcnt(abd_t *abd)
|
||||
{
|
||||
ASSERT(!abd_is_linear(abd));
|
||||
return (abd_chunkcnt_for_bytes(
|
||||
abd->abd_u.abd_scatter.abd_offset + abd->abd_size));
|
||||
}
|
||||
|
||||
static inline void
|
||||
abd_verify(abd_t *abd)
|
||||
{
|
||||
ASSERT3U(abd->abd_size, >, 0);
|
||||
ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
|
||||
ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
|
||||
ABD_FLAG_OWNER | ABD_FLAG_META));
|
||||
ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
|
||||
ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE));
|
||||
IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
|
||||
IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
|
||||
if (abd_is_linear(abd)) {
|
||||
ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL);
|
||||
ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
|
||||
} else {
|
||||
ASSERT3U(abd->abd_u.abd_scatter.abd_offset, <,
|
||||
zfs_abd_chunk_size);
|
||||
size_t n = abd_scatter_chunkcnt(abd);
|
||||
for (int i = 0; i < n; i++) {
|
||||
ASSERT3P(
|
||||
abd->abd_u.abd_scatter.abd_chunks[i], !=, NULL);
|
||||
}
|
||||
abd_verify_scatter(abd);
|
||||
}
|
||||
}
|
||||
|
||||
static inline abd_t *
|
||||
abd_alloc_struct(size_t chunkcnt)
|
||||
uint_t
|
||||
abd_get_size(abd_t *abd)
|
||||
{
|
||||
size_t size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
|
||||
abd_t *abd = kmem_alloc(size, KM_PUSHPAGE);
|
||||
ASSERT3P(abd, !=, NULL);
|
||||
ABDSTAT_INCR(abdstat_struct_size, size);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
static inline void
|
||||
abd_free_struct(abd_t *abd)
|
||||
{
|
||||
size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd);
|
||||
int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]);
|
||||
kmem_free(abd, size);
|
||||
ABDSTAT_INCR(abdstat_struct_size, -size);
|
||||
abd_verify(abd);
|
||||
return (abd->abd_size);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -272,15 +145,16 @@ abd_free_struct(abd_t *abd)
|
|||
abd_t *
|
||||
abd_alloc(size_t size, boolean_t is_metadata)
|
||||
{
|
||||
if (!zfs_abd_scatter_enabled || size <= zfs_abd_chunk_size)
|
||||
if (!zfs_abd_scatter_enabled || abd_size_alloc_linear(size))
|
||||
return (abd_alloc_linear(size, is_metadata));
|
||||
|
||||
VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
|
||||
|
||||
size_t n = abd_chunkcnt_for_bytes(size);
|
||||
abd_t *abd = abd_alloc_struct(n);
|
||||
|
||||
abd_t *abd = abd_alloc_struct(size);
|
||||
abd->abd_flags = ABD_FLAG_OWNER;
|
||||
abd->abd_u.abd_scatter.abd_offset = 0;
|
||||
abd_alloc_chunks(abd, size);
|
||||
|
||||
if (is_metadata) {
|
||||
abd->abd_flags |= ABD_FLAG_META;
|
||||
}
|
||||
|
@ -288,19 +162,7 @@ abd_alloc(size_t size, boolean_t is_metadata)
|
|||
abd->abd_parent = NULL;
|
||||
zfs_refcount_create(&abd->abd_children);
|
||||
|
||||
abd->abd_u.abd_scatter.abd_offset = 0;
|
||||
abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
void *c = abd_alloc_chunk();
|
||||
ASSERT3P(c, !=, NULL);
|
||||
abd->abd_u.abd_scatter.abd_chunks[i] = c;
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
n * zfs_abd_chunk_size - size);
|
||||
abd_update_scatter_stats(abd, ABDSTAT_INCR);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
@ -308,17 +170,32 @@ abd_alloc(size_t size, boolean_t is_metadata)
|
|||
static void
|
||||
abd_free_scatter(abd_t *abd)
|
||||
{
|
||||
size_t n = abd_scatter_chunkcnt(abd);
|
||||
for (int i = 0; i < n; i++) {
|
||||
abd_free_chunk(abd->abd_u.abd_scatter.abd_chunks[i]);
|
||||
abd_free_chunks(abd);
|
||||
|
||||
zfs_refcount_destroy(&abd->abd_children);
|
||||
abd_update_scatter_stats(abd, ABDSTAT_DECR);
|
||||
abd_free_struct(abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
|
||||
* free the underlying scatterlist or buffer.
|
||||
*/
|
||||
void
|
||||
abd_put(abd_t *abd)
|
||||
{
|
||||
if (abd == NULL)
|
||||
return;
|
||||
|
||||
abd_verify(abd);
|
||||
ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
|
||||
|
||||
if (abd->abd_parent != NULL) {
|
||||
(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
|
||||
abd->abd_size, abd);
|
||||
}
|
||||
|
||||
zfs_refcount_destroy(&abd->abd_children);
|
||||
ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
|
||||
ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
|
||||
ABDSTAT_INCR(abdstat_scatter_chunk_waste,
|
||||
abd->abd_size - n * zfs_abd_chunk_size);
|
||||
|
||||
abd_free_struct(abd);
|
||||
}
|
||||
|
||||
|
@ -343,13 +220,12 @@ abd_alloc_linear(size_t size, boolean_t is_metadata)
|
|||
zfs_refcount_create(&abd->abd_children);
|
||||
|
||||
if (is_metadata) {
|
||||
abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size);
|
||||
ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
|
||||
} else {
|
||||
abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size);
|
||||
ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size);
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, size);
|
||||
abd_update_linear_stats(abd, ABDSTAT_INCR);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
@ -357,15 +233,18 @@ abd_alloc_linear(size_t size, boolean_t is_metadata)
|
|||
static void
|
||||
abd_free_linear(abd_t *abd)
|
||||
{
|
||||
if (abd_is_linear_page(abd)) {
|
||||
abd_free_linear_page(abd);
|
||||
return;
|
||||
}
|
||||
if (abd->abd_flags & ABD_FLAG_META) {
|
||||
zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
|
||||
zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
|
||||
} else {
|
||||
zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
|
||||
zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
|
||||
}
|
||||
|
||||
zfs_refcount_destroy(&abd->abd_children);
|
||||
ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
|
||||
abd_update_linear_stats(abd, ABDSTAT_DECR);
|
||||
|
||||
abd_free_struct(abd);
|
||||
}
|
||||
|
@ -397,39 +276,23 @@ abd_t *
|
|||
abd_alloc_sametype(abd_t *sabd, size_t size)
|
||||
{
|
||||
boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
|
||||
if (abd_is_linear(sabd)) {
|
||||
if (abd_is_linear(sabd) &&
|
||||
!abd_is_linear_page(sabd)) {
|
||||
return (abd_alloc_linear(size, is_metadata));
|
||||
} else {
|
||||
return (abd_alloc(size, is_metadata));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're going to use this ABD for doing I/O using the block layer, the
|
||||
* consumer of the ABD data doesn't care if it's scattered or not, and we don't
|
||||
* plan to store this ABD in memory for a long period of time, we should
|
||||
* allocate the ABD type that requires the least data copying to do the I/O.
|
||||
*
|
||||
* Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os
|
||||
* using a scatter/gather list we should switch to that and replace this call
|
||||
* with vanilla abd_alloc().
|
||||
*/
|
||||
abd_t *
|
||||
abd_alloc_for_io(size_t size, boolean_t is_metadata)
|
||||
{
|
||||
return (abd_alloc_linear(size, is_metadata));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new ABD to point to offset off of sabd. It shares the underlying
|
||||
* buffer data with sabd. Use abd_put() to free. sabd must not be freed while
|
||||
* any derived ABDs exist.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static inline abd_t *
|
||||
static abd_t *
|
||||
abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
|
||||
{
|
||||
abd_t *abd;
|
||||
abd_t *abd = NULL;
|
||||
|
||||
abd_verify(sabd);
|
||||
ASSERT3U(off, <=, sabd->abd_size);
|
||||
|
@ -444,60 +307,33 @@ abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
|
|||
*/
|
||||
abd->abd_flags = ABD_FLAG_LINEAR;
|
||||
|
||||
abd->abd_u.abd_linear.abd_buf =
|
||||
(char *)sabd->abd_u.abd_linear.abd_buf + off;
|
||||
ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
|
||||
} else {
|
||||
size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off;
|
||||
size_t chunkcnt = abd_scatter_chunkcnt(sabd) -
|
||||
(new_offset / zfs_abd_chunk_size);
|
||||
|
||||
abd = abd_alloc_struct(chunkcnt);
|
||||
|
||||
/*
|
||||
* Even if this buf is filesystem metadata, we only track that
|
||||
* if we own the underlying data buffer, which is not true in
|
||||
* this case. Therefore, we don't ever use ABD_FLAG_META here.
|
||||
*/
|
||||
abd->abd_flags = 0;
|
||||
|
||||
abd->abd_u.abd_scatter.abd_offset =
|
||||
new_offset % zfs_abd_chunk_size;
|
||||
abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size;
|
||||
|
||||
/* Copy the scatterlist starting at the correct offset */
|
||||
(void) memcpy(&abd->abd_u.abd_scatter.abd_chunks,
|
||||
&sabd->abd_u.abd_scatter.abd_chunks[new_offset /
|
||||
zfs_abd_chunk_size],
|
||||
chunkcnt * sizeof (void *));
|
||||
abd = abd_get_offset_scatter(sabd, off);
|
||||
}
|
||||
|
||||
if (size == 0)
|
||||
abd->abd_size = sabd->abd_size - off;
|
||||
else
|
||||
abd->abd_size = size;
|
||||
abd->abd_parent = sabd;
|
||||
zfs_refcount_create(&abd->abd_children);
|
||||
(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset(abd_t *sabd, size_t off)
|
||||
{
|
||||
|
||||
return (abd_get_offset_impl(sabd, off, 0));
|
||||
size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
|
||||
VERIFY3U(size, >, 0);
|
||||
return (abd_get_offset_impl(sabd, off, size));
|
||||
}
|
||||
|
||||
abd_t *
|
||||
abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
|
||||
{
|
||||
ASSERT3U(off + size, <=, sabd->abd_size);
|
||||
|
||||
return (abd_get_offset_impl(sabd, off, size));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Allocate a linear ABD structure for buf. You must free this with abd_put()
|
||||
* since the resulting ABD doesn't own its own buffer.
|
||||
|
@ -519,32 +355,11 @@ abd_get_from_buf(void *buf, size_t size)
|
|||
abd->abd_parent = NULL;
|
||||
zfs_refcount_create(&abd->abd_children);
|
||||
|
||||
abd->abd_u.abd_linear.abd_buf = buf;
|
||||
ABD_LINEAR_BUF(abd) = buf;
|
||||
|
||||
return (abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
|
||||
* free the underlying scatterlist or buffer.
|
||||
*/
|
||||
void
|
||||
abd_put(abd_t *abd)
|
||||
{
|
||||
if (abd == NULL)
|
||||
return;
|
||||
abd_verify(abd);
|
||||
ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
|
||||
|
||||
if (abd->abd_parent != NULL) {
|
||||
(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
|
||||
abd->abd_size, abd);
|
||||
}
|
||||
|
||||
zfs_refcount_destroy(&abd->abd_children);
|
||||
abd_free_struct(abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the raw buffer associated with a linear ABD.
|
||||
*/
|
||||
|
@ -553,7 +368,7 @@ abd_to_buf(abd_t *abd)
|
|||
{
|
||||
ASSERT(abd_is_linear(abd));
|
||||
abd_verify(abd);
|
||||
return (abd->abd_u.abd_linear.abd_buf);
|
||||
return (ABD_LINEAR_BUF(abd));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -574,7 +389,6 @@ abd_borrow_buf(abd_t *abd, size_t n)
|
|||
buf = zio_buf_alloc(n);
|
||||
}
|
||||
(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
|
||||
|
||||
return (buf);
|
||||
}
|
||||
|
||||
|
@ -617,6 +431,31 @@ abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
|
|||
abd_return_buf(abd, buf, n);
|
||||
}
|
||||
|
||||
void
|
||||
abd_release_ownership_of_buf(abd_t *abd)
|
||||
{
|
||||
ASSERT(abd_is_linear(abd));
|
||||
ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
|
||||
|
||||
/*
|
||||
* abd_free() needs to handle LINEAR_PAGE ABD's specially.
|
||||
* Since that flag does not survive the
|
||||
* abd_release_ownership_of_buf() -> abd_get_from_buf() ->
|
||||
* abd_take_ownership_of_buf() sequence, we don't allow releasing
|
||||
* these "linear but not zio_[data_]buf_alloc()'ed" ABD's.
|
||||
*/
|
||||
ASSERT(!abd_is_linear_page(abd));
|
||||
|
||||
abd_verify(abd);
|
||||
|
||||
abd->abd_flags &= ~ABD_FLAG_OWNER;
|
||||
/* Disable this flag since we no longer own the data buffer */
|
||||
abd->abd_flags &= ~ABD_FLAG_META;
|
||||
|
||||
abd_update_linear_stats(abd, ABDSTAT_DECR);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Give this ABD ownership of the buffer that it's storing. Can only be used on
|
||||
* linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
|
||||
|
@ -635,130 +474,7 @@ abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
|
|||
abd->abd_flags |= ABD_FLAG_META;
|
||||
}
|
||||
|
||||
ABDSTAT_BUMP(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
|
||||
}
|
||||
|
||||
void
|
||||
abd_release_ownership_of_buf(abd_t *abd)
|
||||
{
|
||||
ASSERT(abd_is_linear(abd));
|
||||
ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
|
||||
abd_verify(abd);
|
||||
|
||||
abd->abd_flags &= ~ABD_FLAG_OWNER;
|
||||
/* Disable this flag since we no longer own the data buffer */
|
||||
abd->abd_flags &= ~ABD_FLAG_META;
|
||||
|
||||
ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
|
||||
ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
|
||||
}
|
||||
|
||||
struct abd_iter {
|
||||
abd_t *iter_abd; /* ABD being iterated through */
|
||||
size_t iter_pos; /* position (relative to abd_offset) */
|
||||
void *iter_mapaddr; /* addr corresponding to iter_pos */
|
||||
size_t iter_mapsize; /* length of data valid at mapaddr */
|
||||
};
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_offset(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset +
|
||||
aiter->iter_pos) % zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
abd_iter_scatter_chunk_index(struct abd_iter *aiter)
|
||||
{
|
||||
ASSERT(!abd_is_linear(aiter->iter_abd));
|
||||
return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset +
|
||||
aiter->iter_pos) / zfs_abd_chunk_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the abd_iter.
|
||||
*/
|
||||
static void
|
||||
abd_iter_init(struct abd_iter *aiter, abd_t *abd)
|
||||
{
|
||||
abd_verify(abd);
|
||||
aiter->iter_abd = abd;
|
||||
aiter->iter_pos = 0;
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance the iterator by a certain amount. Cannot be called when a chunk is
|
||||
* in use. This can be safely called when the aiter has already exhausted, in
|
||||
* which case this does nothing.
|
||||
*/
|
||||
static void
|
||||
abd_iter_advance(struct abd_iter *aiter, size_t amount)
|
||||
{
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* There's nothing left to advance to, so do nothing */
|
||||
if (aiter->iter_pos == aiter->iter_abd->abd_size)
|
||||
return;
|
||||
|
||||
aiter->iter_pos += amount;
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the current chunk into aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
static void
|
||||
abd_iter_map(struct abd_iter *aiter)
|
||||
{
|
||||
void *paddr;
|
||||
size_t offset = 0;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, ==, NULL);
|
||||
ASSERT0(aiter->iter_mapsize);
|
||||
|
||||
/* Panic if someone has changed zfs_abd_chunk_size */
|
||||
IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size ==
|
||||
aiter->iter_abd->abd_u.abd_scatter.abd_chunk_size);
|
||||
|
||||
/* There's nothing left to iterate over, so do nothing */
|
||||
if (aiter->iter_pos == aiter->iter_abd->abd_size)
|
||||
return;
|
||||
|
||||
if (abd_is_linear(aiter->iter_abd)) {
|
||||
offset = aiter->iter_pos;
|
||||
aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
|
||||
paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf;
|
||||
} else {
|
||||
size_t index = abd_iter_scatter_chunk_index(aiter);
|
||||
offset = abd_iter_scatter_chunk_offset(aiter);
|
||||
aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset,
|
||||
aiter->iter_abd->abd_size - aiter->iter_pos);
|
||||
paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index];
|
||||
}
|
||||
aiter->iter_mapaddr = (char *)paddr + offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmap the current chunk from aiter. This can be safely called when the aiter
|
||||
* has already exhausted, in which case this does nothing.
|
||||
*/
|
||||
static void
|
||||
abd_iter_unmap(struct abd_iter *aiter)
|
||||
{
|
||||
/* There's nothing left to unmap, so do nothing */
|
||||
if (aiter->iter_pos == aiter->iter_abd->abd_size)
|
||||
return;
|
||||
|
||||
ASSERT3P(aiter->iter_mapaddr, !=, NULL);
|
||||
ASSERT3U(aiter->iter_mapsize, >, 0);
|
||||
|
||||
aiter->iter_mapaddr = NULL;
|
||||
aiter->iter_mapsize = 0;
|
||||
abd_update_linear_stats(abd, ABDSTAT_INCR);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -987,6 +703,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
struct abd_iter caiters[3];
|
||||
struct abd_iter daiter = {0};
|
||||
void *caddrs[3];
|
||||
unsigned long flags = 0;
|
||||
|
||||
ASSERT3U(parity, <=, 3);
|
||||
|
||||
|
@ -998,7 +715,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
|
||||
ASSERT3S(dsize, >=, 0);
|
||||
|
||||
critical_enter();
|
||||
abd_enter_critical(flags);
|
||||
while (csize > 0) {
|
||||
len = csize;
|
||||
|
||||
|
@ -1010,11 +727,14 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
caddrs[i] = caiters[i].iter_mapaddr;
|
||||
}
|
||||
|
||||
|
||||
switch (parity) {
|
||||
case 3:
|
||||
len = MIN(caiters[2].iter_mapsize, len);
|
||||
/* falls through */
|
||||
case 2:
|
||||
len = MIN(caiters[1].iter_mapsize, len);
|
||||
/* falls through */
|
||||
case 1:
|
||||
len = MIN(caiters[0].iter_mapsize, len);
|
||||
}
|
||||
|
@ -1055,7 +775,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
|
|||
ASSERT3S(dsize, >=, 0);
|
||||
ASSERT3S(csize, >=, 0);
|
||||
}
|
||||
critical_exit();
|
||||
abd_exit_critical(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1080,6 +800,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
struct abd_iter citers[3];
|
||||
struct abd_iter xiters[3];
|
||||
void *caddrs[3], *xaddrs[3];
|
||||
unsigned long flags = 0;
|
||||
|
||||
ASSERT3U(parity, <=, 3);
|
||||
|
||||
|
@ -1088,7 +809,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
abd_iter_init(&xiters[i], tabds[i]);
|
||||
}
|
||||
|
||||
critical_enter();
|
||||
abd_enter_critical(flags);
|
||||
while (tsize > 0) {
|
||||
|
||||
for (i = 0; i < parity; i++) {
|
||||
|
@ -1103,9 +824,11 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
case 3:
|
||||
len = MIN(xiters[2].iter_mapsize, len);
|
||||
len = MIN(citers[2].iter_mapsize, len);
|
||||
/* falls through */
|
||||
case 2:
|
||||
len = MIN(xiters[1].iter_mapsize, len);
|
||||
len = MIN(citers[1].iter_mapsize, len);
|
||||
/* falls through */
|
||||
case 1:
|
||||
len = MIN(xiters[0].iter_mapsize, len);
|
||||
len = MIN(citers[0].iter_mapsize, len);
|
||||
|
@ -1130,5 +853,5 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
|
|||
tsize -= len;
|
||||
ASSERT3S(tsize, >=, 0);
|
||||
}
|
||||
critical_exit();
|
||||
abd_exit_critical(flags);
|
||||
}
|
|
@ -1638,7 +1638,7 @@ vdev_indirect_splits_damage(indirect_vsd_t *iv, zio_t *zio)
|
|||
if (ic->ic_data == NULL)
|
||||
continue;
|
||||
|
||||
abd_zero(ic->ic_data, ic->ic_data->abd_size);
|
||||
abd_zero(ic->ic_data, abd_get_size(ic->ic_data));
|
||||
}
|
||||
|
||||
iv->iv_attempts_max *= 2;
|
||||
|
|
Loading…
Reference in New Issue