zfs/include/sys/zil_impl.h

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2012 by Delphix. All rights reserved.
 */

/* Portions Copyright 2010 Robert Milkowski */

#ifndef	_SYS_ZIL_IMPL_H
#define	_SYS_ZIL_IMPL_H

#include <sys/zil.h>
#include <sys/dmu_objset.h>

#ifdef	__cplusplus
extern "C" {
#endif

/*
 * Log write buffer.
 */
typedef struct lwb {
	zilog_t		*lwb_zilog;	/* back pointer to log struct */
	blkptr_t	lwb_blk;	/* on disk address of this log blk */
	boolean_t	lwb_fastwrite;	/* is blk marked for fastwrite? */
	boolean_t	lwb_slog;	/* lwb_blk is on SLOG device */
	int		lwb_nused;	/* # used bytes in buffer */
	int		lwb_sz;		/* size of block and buffer */
	char		*lwb_buf;	/* log write buffer */
	zio_t		*lwb_zio;	/* zio for this buffer */
	dmu_tx_t	*lwb_tx;	/* tx for log block allocation */
	uint64_t	lwb_max_txg;	/* highest txg in this lwb */
	list_node_t	lwb_node;	/* zilog->zl_lwb_list linkage */
} lwb_t;

/*
 * Intent log transaction lists
 */
typedef struct itxs {
	list_t		i_sync_list;	/* list of synchronous itxs */
	avl_tree_t	i_async_tree;	/* tree of foids for async itxs */
} itxs_t;

typedef struct itxg {
	kmutex_t	itxg_lock;	/* lock for this structure */
	uint64_t	itxg_txg;	/* txg for this chain */
	itxs_t		*itxg_itxs;	/* sync and async itxs */
} itxg_t;

/* for async nodes we build up an AVL tree of lists of async itxs per file */
typedef struct itx_async_node {
	uint64_t	ia_foid;	/* file object id */
	list_t		ia_list;	/* list of async itxs for this foid */
	avl_node_t	ia_node;	/* AVL tree linkage */
} itx_async_node_t;

/*
 * Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs
 * we've touched so we know which ones need a write cache flush at the end.
 */
typedef struct zil_vdev_node {
	uint64_t	zv_vdev;	/* vdev to be flushed */
	avl_node_t	zv_node;	/* AVL tree linkage */
} zil_vdev_node_t;

#define	ZIL_PREV_BLKS 16

/*
 * Stable storage intent log management structure.  One per dataset.
 */
struct zilog {
	kmutex_t	zl_lock;	/* protects most zilog_t fields */
	struct dsl_pool	*zl_dmu_pool;	/* DSL pool */
	spa_t		*zl_spa;	/* handle for read/write log */
	const zil_header_t *zl_header;	/* log header buffer */
	objset_t	*zl_os;		/* object set we're logging */
	zil_get_data_t	*zl_get_data;	/* callback to get object content */
	zio_t		*zl_root_zio;	/* log writer root zio */
	uint64_t	zl_lr_seq;	/* on-disk log record sequence number */
	uint64_t	zl_commit_lr_seq; /* last committed on-disk lr seq */
	uint64_t	zl_destroy_txg;	/* txg of last zil_destroy() */
	uint64_t	zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */
	uint64_t	zl_replaying_seq; /* current replay seq number */
	uint32_t	zl_suspend;	/* log suspend count */
	kcondvar_t	zl_cv_writer;	/* log writer thread completion */
	kcondvar_t	zl_cv_suspend;	/* log suspend completion */
	uint8_t		zl_suspending;	/* log is currently suspending */
	uint8_t		zl_keep_first;	/* keep first log block in destroy */
	uint8_t		zl_replay;	/* replaying records while set */
	uint8_t		zl_stop_sync;	/* for debugging */
	uint8_t		zl_writer;	/* boolean: write setup in progress */
	uint8_t		zl_logbias;	/* latency or throughput */
	uint8_t		zl_sync;	/* synchronous or asynchronous */
	int		zl_parse_error;	/* last zil_parse() error */
	uint64_t	zl_parse_blk_seq; /* highest blk seq on last parse */
	uint64_t	zl_parse_lr_seq; /* highest lr seq on last parse */
	uint64_t	zl_parse_blk_count; /* number of blocks parsed */
	uint64_t	zl_parse_lr_count; /* number of log records parsed */
	uint64_t	zl_next_batch;	/* next batch number */
	uint64_t	zl_com_batch;	/* committed batch number */
	kcondvar_t	zl_cv_batch[2];	/* batch condition variables */
	itxg_t		zl_itxg[TXG_SIZE]; /* intent log txg chains */
	list_t		zl_itx_commit_list; /* itx list to be committed */
	uint64_t	zl_cur_used;	/* current commit log size used */
	list_t		zl_lwb_list;	/* in-flight log write list */
	kmutex_t	zl_vdev_lock;	/* protects zl_vdev_tree */
	avl_tree_t	zl_vdev_tree;	/* vdevs to flush in zil_commit() */
	avl_tree_t	zl_bp_tree;	/* track bps during log parse */
	clock_t		zl_replay_time;	/* lbolt of when replay started */
	uint64_t	zl_replay_blks;	/* number of log blocks replayed */
	zil_header_t	zl_old_header;	/* debugging aid */
	uint_t		zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
	uint_t		zl_prev_rotor;	/* rotor for zl_prev[] */
	txg_node_t	zl_dirty_link;	/* protected by dp_dirty_zilogs list */
};

typedef struct zil_bp_node {
	dva_t		zn_dva;
	avl_node_t	zn_node;
} zil_bp_node_t;

/*
 * Maximum amount of write data that can be put into single log block.
 */
#define	ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
    sizeof (lr_write_t))

/*
 * Maximum amount of log space we agree to waste to reduce number of
 * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
 */
#define	ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)

/*
 * Maximum amount of write data for WR_COPIED.  Fall back to WR_NEED_COPY
 * as more space efficient if we can't fit at least two log records into
 * maximum sized log block.
 */
#define	ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \
    sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))

#ifdef	__cplusplus
}
#endif

#endif	/* _SYS_ZIL_IMPL_H */
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`/*`
			`* CDDL HEADER START`
			`*`
			`* The contents of this file are subject to the terms of the`
			`* Common Development and Distribution License (the "License").`
			`* You may not use this file except in compliance with the License.`
			`*`
			`* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE`
			`* or http://www.opensolaris.org/os/licensing.`
			`* See the License for the specific language governing permissions`
			`* and limitations under the License.`
			`*`
			`* When distributing Covered Code, include this CDDL HEADER in each`
			`* file and include the License file at usr/src/OPENSOLARIS.LICENSE.`
			`* If applicable, add the following below this CDDL HEADER, with the`
			`* fields enclosed by brackets "[]" replaced with your own identifying`
			`* information: Portions Copyright [yyyy] [name of copyright owner]`
			`*`
			`* CDDL HEADER END`
			`*/`
			`/*`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.`
Illumos #3086: unnecessarily setting DS_FLAG_INCONSISTENT on async 3086 unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <Eric.Schrock@delphix.com> References: illumos/illumos-gate@ce636f8b38e8c9ff484e880d9abb27251a882860 illumos changeset: 13776:cd512c80fd75 https://www.illumos.org/issues/3086 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> 2012-12-15 00:13:40 +00:00			`* Copyright (c) 2012 by Delphix. All rights reserved.`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`*/`

Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`/* Portions Copyright 2010 Robert Milkowski */`

Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`#ifndef _SYS_ZIL_IMPL_H`
			`#define _SYS_ZIL_IMPL_H`

			`#include <sys/zil.h>`
			`#include <sys/dmu_objset.h>`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`/*`
			`* Log write buffer.`
			`*/`
			`typedef struct lwb {`
			`zilog_t lwb_zilog; / back pointer to log struct */`
			`blkptr_t lwb_blk; /* on disk address of this log blk */`
cstyle: Resolve C style issues The vast majority of these changes are in Linux specific code. They are the result of not having an automated style checker to validate the code when it was originally written. Others were caused when the common code was slightly adjusted for Linux. This patch contains no functional changes. It only refreshes the code to conform to style guide. Everyone submitting patches for inclusion upstream should now run 'make checkstyle' and resolve any warning prior to opening a pull request. The automated builders have been updated to fail a build if when 'make checkstyle' detects an issue. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #1821 2013-11-01 19:26:11 +00:00			`boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */`
OpenZFS 7578 - Fix/improve some aspects of ZIL writing - After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. - Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. - Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. - While there, untangle and unify code of z_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc. Authored by: Alexander Motin <mav@FreeBSD.org> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <ryao@gentoo.org> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7578 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/aeb13ac Closes #6191 2017-06-09 16:15:37 +00:00			`boolean_t lwb_slog; /* lwb_blk is on SLOG device */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`int lwb_nused; /* # used bytes in buffer */`
			`int lwb_sz; /* size of block and buffer */`
			`char lwb_buf; / log write buffer */`
			`zio_t lwb_zio; / zio for this buffer */`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`dmu_tx_t lwb_tx; / tx for log block allocation */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`uint64_t lwb_max_txg; /* highest txg in this lwb */`
			`list_node_t lwb_node; /* zilog->zl_lwb_list linkage */`
			`} lwb_t;`

Update to onnv_147 This is the last official OpenSolaris tag before the public development tree was closed. 2010-08-26 21:24:34 +00:00			`/*`
			`* Intent log transaction lists`
			`*/`
			`typedef struct itxs {`
			`list_t i_sync_list; /* list of synchronous itxs */`
			`avl_tree_t i_async_tree; /* tree of foids for async itxs */`
			`} itxs_t;`

			`typedef struct itxg {`
			`kmutex_t itxg_lock; /* lock for this structure */`
			`uint64_t itxg_txg; /* txg for this chain */`
			`itxs_t itxg_itxs; / sync and async itxs */`
			`} itxg_t;`

			`/* for async nodes we build up an AVL tree of lists of async itxs per file */`
			`typedef struct itx_async_node {`
			`uint64_t ia_foid; /* file object id */`
			`list_t ia_list; /* list of async itxs for this foid */`
			`avl_node_t ia_node; /* AVL tree linkage */`
			`} itx_async_node_t;`

Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`/*`
			`* Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs`
			`* we've touched so we know which ones need a write cache flush at the end.`
			`*/`
			`typedef struct zil_vdev_node {`
			`uint64_t zv_vdev; /* vdev to be flushed */`
			`avl_node_t zv_node; /* AVL tree linkage */`
			`} zil_vdev_node_t;`

Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`#define ZIL_PREV_BLKS 16`

Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`/*`
			`* Stable storage intent log management structure. One per dataset.`
			`*/`
			`struct zilog {`
			`kmutex_t zl_lock; /* protects most zilog_t fields */`
			`struct dsl_pool zl_dmu_pool; / DSL pool */`
			`spa_t zl_spa; / handle for read/write log */`
			`const zil_header_t zl_header; / log header buffer */`
			`objset_t zl_os; / object set we're logging */`
			`zil_get_data_t zl_get_data; / callback to get object content */`
			`zio_t zl_root_zio; / log writer root zio */`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`uint64_t zl_lr_seq; /* on-disk log record sequence number */`
			`uint64_t zl_commit_lr_seq; /* last committed on-disk lr seq */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`uint64_t zl_destroy_txg; /* txg of last zil_destroy() */`
Rebase master to b105 2009-01-15 21:59:39 +00:00			`uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */`
			`uint64_t zl_replaying_seq; /* current replay seq number */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`uint32_t zl_suspend; /* log suspend count */`
			`kcondvar_t zl_cv_writer; /* log writer thread completion */`
			`kcondvar_t zl_cv_suspend; /* log suspend completion */`
			`uint8_t zl_suspending; /* log is currently suspending */`
			`uint8_t zl_keep_first; /* keep first log block in destroy */`
Rebase master to b105 2009-01-15 21:59:39 +00:00			`uint8_t zl_replay; /* replaying records while set */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`uint8_t zl_stop_sync; /* for debugging */`
			`uint8_t zl_writer; /* boolean: write setup in progress */`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`uint8_t zl_logbias; /* latency or throughput */`
			`uint8_t zl_sync; /* synchronous or asynchronous */`
			`int zl_parse_error; /* last zil_parse() error */`
			`uint64_t zl_parse_blk_seq; /* highest blk seq on last parse */`
			`uint64_t zl_parse_lr_seq; /* highest lr seq on last parse */`
			`uint64_t zl_parse_blk_count; /* number of blocks parsed */`
			`uint64_t zl_parse_lr_count; /* number of log records parsed */`
Update to onnv_147 This is the last official OpenSolaris tag before the public development tree was closed. 2010-08-26 21:24:34 +00:00			`uint64_t zl_next_batch; /* next batch number */`
			`uint64_t zl_com_batch; /* committed batch number */`
			`kcondvar_t zl_cv_batch[2]; /* batch condition variables */`
			`itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */`
			`list_t zl_itx_commit_list; /* itx list to be committed */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`uint64_t zl_cur_used; /* current commit log size used */`
			`list_t zl_lwb_list; /* in-flight log write list */`
			`kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */`
			`avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`avl_tree_t zl_bp_tree; /* track bps during log parse */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`clock_t zl_replay_time; /* lbolt of when replay started */`
			`uint64_t zl_replay_blks; /* number of log blocks replayed */`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`zil_header_t zl_old_header; /* debugging aid */`
			`uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */`
			`uint_t zl_prev_rotor; /* rotor for zl_prev[] */`
Illumos #3086: unnecessarily setting DS_FLAG_INCONSISTENT on async 3086 unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <Eric.Schrock@delphix.com> References: illumos/illumos-gate@ce636f8b38e8c9ff484e880d9abb27251a882860 illumos changeset: 13776:cd512c80fd75 https://www.illumos.org/issues/3086 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> 2012-12-15 00:13:40 +00:00			`txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`};`

Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`typedef struct zil_bp_node {`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`dva_t zn_dva;`
			`avl_node_t zn_node;`
Update core ZFS code from build 121 to build 141. 2010-05-28 20:45:14 +00:00			`} zil_bp_node_t;`
Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00
OpenZFS 7578 - Fix/improve some aspects of ZIL writing - After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. - Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. - Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. - While there, untangle and unify code of z_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc. Authored by: Alexander Motin <mav@FreeBSD.org> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <ryao@gentoo.org> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7578 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/aeb13ac Closes #6191 2017-06-09 16:15:37 +00:00			`/*`
			`* Maximum amount of write data that can be put into single log block.`
			`*/`
Illumos 5027 - zfs large block support 5027 zfs large block support Reviewed by: Alek Pinchuk <pinchuk.alek@gmail.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5027 https://github.com/illumos/illumos-gate/commit/b515258 Porting Notes: * Included in this patch is a tiny ISP2() cleanup in zio_init() from Illumos 5255. * Unlike the upstream Illumos commit this patch does not impose an arbitrary 128K block size limit on volumes. Volumes, like filesystems, are limited by the zfs_max_recordsize=1M module option. * By default the maximum record size is limited to 1M by the module option zfs_max_recordsize. This value may be safely increased up to 16M which is the largest block size supported by the on-disk format. At the moment, 1M blocks clearly offer a significant performance improvement but the benefits of going beyond this for the majority of workloads are less clear. * The illumos version of this patch increased DMU_MAX_ACCESS to 32M. This was determined not to be large enough when using 16M blocks because the zfs_make_xattrdir() function will fail (EFBIG) when assigning a TX. This was immediately observed under Linux because all newly created files must have a security xattr created and that was failing. Therefore, we've set DMU_MAX_ACCESS to 64M. * On 32-bit platforms a hard limit of 1M is set for blocks due to the limited virtual address space. We should be able to relax this one the ABD patches are merged. Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #354 2014-11-03 20:15:08 +00:00			`#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \`
Rebase master to b117 2009-07-02 22:44:48 +00:00			`sizeof (lr_write_t))`

OpenZFS 7578 - Fix/improve some aspects of ZIL writing - After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. - Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. - Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. - While there, untangle and unify code of z_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc. Authored by: Alexander Motin <mav@FreeBSD.org> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk> Reviewed by: Brad Lewis <brad.lewis@delphix.com> Reviewed by: Richard Elling <Richard.Elling@RichardElling.com> Approved by: Robert Mustacchi <rm@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Richard Yao <ryao@gentoo.org> Ported-by: Giuseppe Di Natale <dinatale2@llnl.gov> OpenZFS-issue: https://www.illumos.org/issues/7578 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/aeb13ac Closes #6191 2017-06-09 16:15:37 +00:00			`/*`
			`* Maximum amount of log space we agree to waste to reduce number of`
			`* WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).`
			`*/`
			`#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)`

			`/*`
			`* Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY`
			`* as more space efficient if we can't fit at least two log records into`
			`* maximum sized log block.`
			`*/`
			`#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \`
			`sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))`

Initial Linux ZFS GIT Repo 2008-11-20 20:01:55 +00:00			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif /* _SYS_ZIL_IMPL_H */`