Merge branch 'b_tracepoints'
Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Ned Bass <bass6@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2874
This commit is contained in:
commit
bc9f4131a1
|
@ -0,0 +1,59 @@
|
||||||
|
dnl #
|
||||||
|
dnl # Ensure the DECLARE_EVENT_CLASS macro is available to non-GPL modules.
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_DECLARE_EVENT_CLASS], [
|
||||||
|
tmp_flags="$EXTRA_KCFLAGS"
|
||||||
|
EXTRA_KCFLAGS="-I\$(src)"
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([whether DECLARE_EVENT_CLASS() is available])
|
||||||
|
ZFS_LINUX_TRY_COMPILE_HEADER([
|
||||||
|
#include <linux/module.h>
|
||||||
|
MODULE_LICENSE(ZFS_META_LICENSE);
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include "conftest.h"
|
||||||
|
],[
|
||||||
|
trace_zfs_autoconf_event_one(1UL);
|
||||||
|
trace_zfs_autoconf_event_two(2UL);
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_DECLARE_EVENT_CLASS, 1,
|
||||||
|
[DECLARE_EVENT_CLASS() is available])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
],[
|
||||||
|
#if !defined(_CONFTEST_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _CONFTEST_H
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM zfs
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(zfs_autoconf_event_class,
|
||||||
|
TP_PROTO(unsigned long i),
|
||||||
|
TP_ARGS(i),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(unsigned long, i)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->i = i;
|
||||||
|
),
|
||||||
|
TP_printk("i = %lu", __entry->i)
|
||||||
|
);
|
||||||
|
|
||||||
|
#define DEFINE_AUTOCONF_EVENT(name) \
|
||||||
|
DEFINE_EVENT(zfs_autoconf_event_class, name, \
|
||||||
|
TP_PROTO(unsigned long i), \
|
||||||
|
TP_ARGS(i))
|
||||||
|
DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_one);
|
||||||
|
DEFINE_AUTOCONF_EVENT(zfs_autoconf_event_two);
|
||||||
|
|
||||||
|
#endif /* _CONFTEST_H */
|
||||||
|
|
||||||
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
#define TRACE_INCLUDE_PATH .
|
||||||
|
#define TRACE_INCLUDE_FILE conftest
|
||||||
|
#include <trace/define_trace.h>
|
||||||
|
])
|
||||||
|
EXTRA_KCFLAGS="$tmp_flags"
|
||||||
|
])
|
|
@ -6,6 +6,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
||||||
ZFS_AC_SPL
|
ZFS_AC_SPL
|
||||||
ZFS_AC_TEST_MODULE
|
ZFS_AC_TEST_MODULE
|
||||||
ZFS_AC_KERNEL_CONFIG
|
ZFS_AC_KERNEL_CONFIG
|
||||||
|
ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
|
||||||
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
|
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
|
||||||
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
|
||||||
ZFS_AC_KERNEL_TYPE_FMODE_T
|
ZFS_AC_KERNEL_TYPE_FMODE_T
|
||||||
|
@ -506,9 +507,18 @@ AC_DEFUN([ZFS_AC_KERNEL_CONFIG_DEBUG_LOCK_ALLOC], [
|
||||||
])
|
])
|
||||||
|
|
||||||
dnl #
|
dnl #
|
||||||
dnl # ZFS_LINUX_CONFTEST
|
dnl # ZFS_LINUX_CONFTEST_H
|
||||||
dnl #
|
dnl #
|
||||||
AC_DEFUN([ZFS_LINUX_CONFTEST], [
|
AC_DEFUN([ZFS_LINUX_CONFTEST_H], [
|
||||||
|
cat - <<_ACEOF >conftest.h
|
||||||
|
$1
|
||||||
|
_ACEOF
|
||||||
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # ZFS_LINUX_CONFTEST_C
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_LINUX_CONFTEST_C], [
|
||||||
cat confdefs.h - <<_ACEOF >conftest.c
|
cat confdefs.h - <<_ACEOF >conftest.c
|
||||||
$1
|
$1
|
||||||
_ACEOF
|
_ACEOF
|
||||||
|
@ -534,13 +544,14 @@ dnl #
|
||||||
dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE
|
dnl # ZFS_LINUX_COMPILE_IFELSE / like AC_COMPILE_IFELSE
|
||||||
dnl #
|
dnl #
|
||||||
AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [
|
AC_DEFUN([ZFS_LINUX_COMPILE_IFELSE], [
|
||||||
m4_ifvaln([$1], [ZFS_LINUX_CONFTEST([$1])])
|
m4_ifvaln([$1], [ZFS_LINUX_CONFTEST_C([$1])])
|
||||||
|
m4_ifvaln([$6], [ZFS_LINUX_CONFTEST_H([$6])], [ZFS_LINUX_CONFTEST_H([])])
|
||||||
rm -Rf build && mkdir -p build && touch build/conftest.mod.c
|
rm -Rf build && mkdir -p build && touch build/conftest.mod.c
|
||||||
echo "obj-m := conftest.o" >build/Makefile
|
echo "obj-m := conftest.o" >build/Makefile
|
||||||
modpost_flag=''
|
modpost_flag=''
|
||||||
test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
|
test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
|
||||||
AS_IF(
|
AS_IF(
|
||||||
[AC_TRY_COMMAND(cp conftest.c build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
|
[AC_TRY_COMMAND(cp conftest.c conftest.h build && make [$2] -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag) >/dev/null && AC_TRY_COMMAND([$3])],
|
||||||
[$4],
|
[$4],
|
||||||
[_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])]
|
[_AC_MSG_LOG_CONFTEST m4_ifvaln([$5],[$5])]
|
||||||
)
|
)
|
||||||
|
@ -627,3 +638,16 @@ AC_DEFUN([ZFS_LINUX_TRY_COMPILE_SYMBOL], [
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # ZFS_LINUX_TRY_COMPILE_HEADER
|
||||||
|
dnl # like ZFS_LINUX_TRY_COMPILE, except the contents conftest.h are
|
||||||
|
dnl # provided via the fifth parameter
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_LINUX_TRY_COMPILE_HEADER],
|
||||||
|
[ZFS_LINUX_COMPILE_IFELSE(
|
||||||
|
[AC_LANG_SOURCE([ZFS_LANG_PROGRAM([[$1]], [[$2]])])],
|
||||||
|
[modules],
|
||||||
|
[test -s build/conftest.o],
|
||||||
|
[$3], [$4], [AC_LANG_SOURCE([$5])])
|
||||||
|
])
|
||||||
|
|
|
@ -2,6 +2,7 @@ SUBDIRS = fm fs
|
||||||
|
|
||||||
COMMON_H = \
|
COMMON_H = \
|
||||||
$(top_srcdir)/include/sys/arc.h \
|
$(top_srcdir)/include/sys/arc.h \
|
||||||
|
$(top_srcdir)/include/sys/arc_impl.h \
|
||||||
$(top_srcdir)/include/sys/avl.h \
|
$(top_srcdir)/include/sys/avl.h \
|
||||||
$(top_srcdir)/include/sys/avl_impl.h \
|
$(top_srcdir)/include/sys/avl_impl.h \
|
||||||
$(top_srcdir)/include/sys/blkptr.h \
|
$(top_srcdir)/include/sys/blkptr.h \
|
||||||
|
@ -39,11 +40,13 @@ COMMON_H = \
|
||||||
$(top_srcdir)/include/sys/rrwlock.h \
|
$(top_srcdir)/include/sys/rrwlock.h \
|
||||||
$(top_srcdir)/include/sys/sa.h \
|
$(top_srcdir)/include/sys/sa.h \
|
||||||
$(top_srcdir)/include/sys/sa_impl.h \
|
$(top_srcdir)/include/sys/sa_impl.h \
|
||||||
|
$(top_srcdir)/include/sys/sdt.h \
|
||||||
$(top_srcdir)/include/sys/spa_boot.h \
|
$(top_srcdir)/include/sys/spa_boot.h \
|
||||||
$(top_srcdir)/include/sys/space_map.h \
|
$(top_srcdir)/include/sys/space_map.h \
|
||||||
$(top_srcdir)/include/sys/space_reftree.h \
|
$(top_srcdir)/include/sys/space_reftree.h \
|
||||||
$(top_srcdir)/include/sys/spa.h \
|
$(top_srcdir)/include/sys/spa.h \
|
||||||
$(top_srcdir)/include/sys/spa_impl.h \
|
$(top_srcdir)/include/sys/spa_impl.h \
|
||||||
|
$(top_srcdir)/include/sys/trace.h \
|
||||||
$(top_srcdir)/include/sys/txg.h \
|
$(top_srcdir)/include/sys/txg.h \
|
||||||
$(top_srcdir)/include/sys/txg_impl.h \
|
$(top_srcdir)/include/sys/txg_impl.h \
|
||||||
$(top_srcdir)/include/sys/u8_textprep_data.h \
|
$(top_srcdir)/include/sys/u8_textprep_data.h \
|
||||||
|
|
|
@ -0,0 +1,157 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||||
|
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SYS_ARC_IMPL_H
|
||||||
|
#define _SYS_ARC_IMPL_H
|
||||||
|
|
||||||
|
#include <sys/arc.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that buffers can be in one of 6 states:
|
||||||
|
* ARC_anon - anonymous (discussed below)
|
||||||
|
* ARC_mru - recently used, currently cached
|
||||||
|
* ARC_mru_ghost - recentely used, no longer in cache
|
||||||
|
* ARC_mfu - frequently used, currently cached
|
||||||
|
* ARC_mfu_ghost - frequently used, no longer in cache
|
||||||
|
* ARC_l2c_only - exists in L2ARC but not other states
|
||||||
|
* When there are no active references to the buffer, they are
|
||||||
|
* are linked onto a list in one of these arc states. These are
|
||||||
|
* the only buffers that can be evicted or deleted. Within each
|
||||||
|
* state there are multiple lists, one for meta-data and one for
|
||||||
|
* non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
|
||||||
|
* etc.) is tracked separately so that it can be managed more
|
||||||
|
* explicitly: favored over data, limited explicitly.
|
||||||
|
*
|
||||||
|
* Anonymous buffers are buffers that are not associated with
|
||||||
|
* a DVA. These are buffers that hold dirty block copies
|
||||||
|
* before they are written to stable storage. By definition,
|
||||||
|
* they are "ref'd" and are considered part of arc_mru
|
||||||
|
* that cannot be freed. Generally, they will aquire a DVA
|
||||||
|
* as they are written and migrate onto the arc_mru list.
|
||||||
|
*
|
||||||
|
* The ARC_l2c_only state is for buffers that are in the second
|
||||||
|
* level ARC but no longer in any of the ARC_m* lists. The second
|
||||||
|
* level ARC itself may also contain buffers that are in any of
|
||||||
|
* the ARC_m* states - meaning that a buffer can exist in two
|
||||||
|
* places. The reason for the ARC_l2c_only state is to keep the
|
||||||
|
* buffer header in the hash table, so that reads that hit the
|
||||||
|
* second level ARC benefit from these fast lookups.
|
||||||
|
*/
|
||||||
|
|
||||||
|
typedef struct arc_state {
|
||||||
|
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
|
||||||
|
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
|
||||||
|
uint64_t arcs_size; /* total amount of data in this state */
|
||||||
|
kmutex_t arcs_mtx;
|
||||||
|
arc_state_type_t arcs_state;
|
||||||
|
} arc_state_t;
|
||||||
|
|
||||||
|
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
|
||||||
|
|
||||||
|
typedef struct arc_callback arc_callback_t;
|
||||||
|
|
||||||
|
struct arc_callback {
|
||||||
|
void *acb_private;
|
||||||
|
arc_done_func_t *acb_done;
|
||||||
|
arc_buf_t *acb_buf;
|
||||||
|
zio_t *acb_zio_dummy;
|
||||||
|
arc_callback_t *acb_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct arc_write_callback arc_write_callback_t;
|
||||||
|
|
||||||
|
struct arc_write_callback {
|
||||||
|
void *awcb_private;
|
||||||
|
arc_done_func_t *awcb_ready;
|
||||||
|
arc_done_func_t *awcb_physdone;
|
||||||
|
arc_done_func_t *awcb_done;
|
||||||
|
arc_buf_t *awcb_buf;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arc_buf_hdr {
|
||||||
|
/* protected by hash lock */
|
||||||
|
dva_t b_dva;
|
||||||
|
uint64_t b_birth;
|
||||||
|
uint64_t b_cksum0;
|
||||||
|
|
||||||
|
kmutex_t b_freeze_lock;
|
||||||
|
zio_cksum_t *b_freeze_cksum;
|
||||||
|
|
||||||
|
arc_buf_hdr_t *b_hash_next;
|
||||||
|
arc_buf_t *b_buf;
|
||||||
|
uint32_t b_flags;
|
||||||
|
uint32_t b_datacnt;
|
||||||
|
|
||||||
|
arc_callback_t *b_acb;
|
||||||
|
kcondvar_t b_cv;
|
||||||
|
|
||||||
|
/* immutable */
|
||||||
|
arc_buf_contents_t b_type;
|
||||||
|
uint64_t b_size;
|
||||||
|
uint64_t b_spa;
|
||||||
|
|
||||||
|
/* protected by arc state mutex */
|
||||||
|
arc_state_t *b_state;
|
||||||
|
list_node_t b_arc_node;
|
||||||
|
|
||||||
|
/* updated atomically */
|
||||||
|
clock_t b_arc_access;
|
||||||
|
uint32_t b_mru_hits;
|
||||||
|
uint32_t b_mru_ghost_hits;
|
||||||
|
uint32_t b_mfu_hits;
|
||||||
|
uint32_t b_mfu_ghost_hits;
|
||||||
|
uint32_t b_l2_hits;
|
||||||
|
|
||||||
|
/* self protecting */
|
||||||
|
refcount_t b_refcnt;
|
||||||
|
|
||||||
|
l2arc_buf_hdr_t *b_l2hdr;
|
||||||
|
list_node_t b_l2node;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct l2arc_dev {
|
||||||
|
vdev_t *l2ad_vdev; /* vdev */
|
||||||
|
spa_t *l2ad_spa; /* spa */
|
||||||
|
uint64_t l2ad_hand; /* next write location */
|
||||||
|
uint64_t l2ad_start; /* first addr on device */
|
||||||
|
uint64_t l2ad_end; /* last addr on device */
|
||||||
|
uint64_t l2ad_evict; /* last addr eviction reached */
|
||||||
|
boolean_t l2ad_first; /* first sweep through */
|
||||||
|
boolean_t l2ad_writing; /* currently writing */
|
||||||
|
list_t *l2ad_buflist; /* buffer list */
|
||||||
|
list_node_t l2ad_node; /* device list node */
|
||||||
|
} l2arc_dev_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _SYS_ARC_IMPL_H */
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License, Version 1.0 only
|
||||||
|
* (the "License"). You may not use this file except in compliance
|
||||||
|
* with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||||
|
* Use is subject to license terms.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SYS_SDT_H
|
||||||
|
#define _SYS_SDT_H
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
|
||||||
|
#define ZFS_PROBE(a) ((void) 0)
|
||||||
|
#define ZFS_PROBE1(a, c) ((void) 0)
|
||||||
|
#define ZFS_PROBE2(a, c, e) ((void) 0)
|
||||||
|
#define ZFS_PROBE3(a, c, e, g) ((void) 0)
|
||||||
|
#define ZFS_PROBE4(a, c, e, g, i) ((void) 0)
|
||||||
|
#define ZFS_SET_ERROR(err) ((void) 0)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#if defined(HAVE_DECLARE_EVENT_CLASS)
|
||||||
|
|
||||||
|
#include <sys/trace.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The set-error SDT probe is extra static, in that we declare its fake
|
||||||
|
* function literally, rather than with the DTRACE_PROBE1() macro. This is
|
||||||
|
* necessary so that SET_ERROR() can evaluate to a value, which wouldn't
|
||||||
|
* be possible if it required multiple statements (to declare the function
|
||||||
|
* and then call it).
|
||||||
|
*
|
||||||
|
* SET_ERROR() uses the comma operator so that it can be used without much
|
||||||
|
* additional code. For example, "return (EINVAL);" becomes
|
||||||
|
* "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated
|
||||||
|
* twice, so it should not have side effects (e.g. something like:
|
||||||
|
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
|
||||||
|
*/
|
||||||
|
#define SET_ERROR(err) \
|
||||||
|
(trace_zfs_set__error(__FILE__, __func__, __LINE__, err), err)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#undef SET_ERROR
|
||||||
|
#define SET_ERROR(err) (err)
|
||||||
|
|
||||||
|
#endif /* HAVE_DECLARE_EVENT_CLASS */
|
||||||
|
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
|
#endif /* _SYS_SDT_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -66,6 +66,7 @@
|
||||||
#include <sys/sunddi.h>
|
#include <sys/sunddi.h>
|
||||||
#include <sys/ctype.h>
|
#include <sys/ctype.h>
|
||||||
#include <sys/disp.h>
|
#include <sys/disp.h>
|
||||||
|
#include <sys/trace.h>
|
||||||
#include <linux/dcache_compat.h>
|
#include <linux/dcache_compat.h>
|
||||||
#include <linux/utsname_compat.h>
|
#include <linux/utsname_compat.h>
|
||||||
|
|
||||||
|
@ -140,15 +141,12 @@
|
||||||
#define CE_PANIC 3 /* panic */
|
#define CE_PANIC 3 /* panic */
|
||||||
#define CE_IGNORE 4 /* print nothing */
|
#define CE_IGNORE 4 /* print nothing */
|
||||||
|
|
||||||
extern int aok;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ZFS debugging
|
* ZFS debugging
|
||||||
*/
|
*/
|
||||||
|
|
||||||
extern void dprintf_setup(int *argc, char **argv);
|
extern void dprintf_setup(int *argc, char **argv);
|
||||||
extern void __dprintf(const char *file, const char *func,
|
|
||||||
int line, const char *fmt, ...);
|
|
||||||
extern void cmn_err(int, const char *, ...);
|
extern void cmn_err(int, const char *, ...);
|
||||||
extern void vcmn_err(int, const char *, va_list);
|
extern void vcmn_err(int, const char *, va_list);
|
||||||
extern void panic(const char *, ...);
|
extern void panic(const char *, ...);
|
||||||
|
@ -156,7 +154,8 @@ extern void vpanic(const char *, va_list);
|
||||||
|
|
||||||
#define fm_panic panic
|
#define fm_panic panic
|
||||||
|
|
||||||
#ifdef __sun
|
extern int aok;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* DTrace SDT probes have different signatures in userland than they do in
|
* DTrace SDT probes have different signatures in userland than they do in
|
||||||
* kernel. If they're being used in kernel code, re-define them out of
|
* kernel. If they're being used in kernel code, re-define them out of
|
||||||
|
@ -202,9 +201,6 @@ extern void vpanic(const char *, va_list);
|
||||||
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
|
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
|
||||||
*/
|
*/
|
||||||
#define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
|
#define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
|
||||||
#else
|
|
||||||
#define SET_ERROR(err) (err)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Threads. TS_STACK_MIN is dictated by the minimum allowed pthread stack
|
* Threads. TS_STACK_MIN is dictated by the minimum allowed pthread stack
|
||||||
|
|
|
@ -38,14 +38,6 @@ extern "C" {
|
||||||
#define FALSE 0
|
#define FALSE 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* ZFS debugging - Always enabled for user space builds.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if !defined(ZFS_DEBUG) && !defined(_KERNEL)
|
|
||||||
#define ZFS_DEBUG
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern int zfs_flags;
|
extern int zfs_flags;
|
||||||
extern int zfs_recover;
|
extern int zfs_recover;
|
||||||
extern int zfs_free_leak_on_eio;
|
extern int zfs_free_leak_on_eio;
|
||||||
|
@ -59,29 +51,15 @@ extern int zfs_free_leak_on_eio;
|
||||||
#define ZFS_DEBUG_ZIO_FREE (1<<6)
|
#define ZFS_DEBUG_ZIO_FREE (1<<6)
|
||||||
#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7)
|
#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7)
|
||||||
|
|
||||||
/*
|
#if defined(HAVE_DECLARE_EVENT_CLASS) || !defined(_KERNEL)
|
||||||
* Always log zfs debug messages to the spl debug subsystem as SS_USER1.
|
extern void __dprintf(const char *file, const char *func,
|
||||||
* When the SPL is configured with debugging enabled these messages will
|
int line, const char *fmt, ...);
|
||||||
* appear in the internal spl debug log, otherwise they are a no-op.
|
#define dprintf(...) \
|
||||||
*/
|
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
||||||
#if defined(_KERNEL)
|
|
||||||
|
|
||||||
#include <spl-debug.h>
|
|
||||||
#define dprintf(...) \
|
|
||||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
|
||||||
__SDEBUG(NULL, SS_USER1, SD_DPRINTF, __VA_ARGS__)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When zfs is running is user space the debugging is always enabled.
|
|
||||||
* The messages will be printed using the __dprintf() function and
|
|
||||||
* filtered based on the zfs_flags variable.
|
|
||||||
*/
|
|
||||||
#else
|
|
||||||
#define dprintf(...) \
|
|
||||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) \
|
|
||||||
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
|
__dprintf(__FILE__, __func__, __LINE__, __VA_ARGS__)
|
||||||
|
#else
|
||||||
#endif /* _KERNEL */
|
#define dprintf(...) ((void)0)
|
||||||
|
#endif /* HAVE_DECLARE_EVENT_CLASS || !_KERNEL */
|
||||||
|
|
||||||
extern void zfs_panic_recover(const char *fmt, ...);
|
extern void zfs_panic_recover(const char *fmt, ...);
|
||||||
|
|
||||||
|
@ -93,12 +71,8 @@ typedef struct zfs_dbgmsg {
|
||||||
|
|
||||||
extern void zfs_dbgmsg_init(void);
|
extern void zfs_dbgmsg_init(void);
|
||||||
extern void zfs_dbgmsg_fini(void);
|
extern void zfs_dbgmsg_fini(void);
|
||||||
#if defined(_KERNEL) && defined(__linux__)
|
|
||||||
#define zfs_dbgmsg(...) dprintf(__VA_ARGS__)
|
|
||||||
#else
|
|
||||||
extern void zfs_dbgmsg(const char *fmt, ...);
|
extern void zfs_dbgmsg(const char *fmt, ...);
|
||||||
extern void zfs_dbgmsg_print(const char *tag);
|
extern void zfs_dbgmsg_print(const char *tag);
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef _KERNEL
|
#ifndef _KERNEL
|
||||||
extern int dprintf_find_string(const char *string);
|
extern int dprintf_find_string(const char *string);
|
||||||
|
|
|
@ -33,7 +33,6 @@ libspl_HEADERS = \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/param.h \
|
$(top_srcdir)/lib/libspl/include/sys/param.h \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/priv.h \
|
$(top_srcdir)/lib/libspl/include/sys/priv.h \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/processor.h \
|
$(top_srcdir)/lib/libspl/include/sys/processor.h \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/sdt.h \
|
|
||||||
$(top_srcdir)/lib/libspl/include/sys/stack.h \
|
$(top_srcdir)/lib/libspl/include/sys/stack.h \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/stat.h \
|
$(top_srcdir)/lib/libspl/include/sys/stat.h \
|
||||||
$(top_srcdir)/lib/libspl/include/sys/stropts.h \
|
$(top_srcdir)/lib/libspl/include/sys/stropts.h \
|
||||||
|
|
|
@ -70,6 +70,7 @@ libzpool_la_SOURCES = \
|
||||||
$(top_srcdir)/module/zfs/space_map.c \
|
$(top_srcdir)/module/zfs/space_map.c \
|
||||||
$(top_srcdir)/module/zfs/space_reftree.c \
|
$(top_srcdir)/module/zfs/space_reftree.c \
|
||||||
$(top_srcdir)/module/zfs/txg.c \
|
$(top_srcdir)/module/zfs/txg.c \
|
||||||
|
$(top_srcdir)/module/zfs/trace.c \
|
||||||
$(top_srcdir)/module/zfs/uberblock.c \
|
$(top_srcdir)/module/zfs/uberblock.c \
|
||||||
$(top_srcdir)/module/zfs/unique.c \
|
$(top_srcdir)/module/zfs/unique.c \
|
||||||
$(top_srcdir)/module/zfs/vdev.c \
|
$(top_srcdir)/module/zfs/vdev.c \
|
||||||
|
|
|
@ -52,6 +52,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/spa_stats.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/space_map.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/space_reftree.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/space_reftree.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/txg.o
|
||||||
|
$(MODULE)-objs += @top_srcdir@/module/zfs/trace.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/uberblock.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/unique.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/unique.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/vdev.o
|
||||||
|
|
119
module/zfs/arc.c
119
module/zfs/arc.c
|
@ -145,6 +145,7 @@
|
||||||
#include <sys/kstat.h>
|
#include <sys/kstat.h>
|
||||||
#include <sys/dmu_tx.h>
|
#include <sys/dmu_tx.h>
|
||||||
#include <zfs_fletcher.h>
|
#include <zfs_fletcher.h>
|
||||||
|
#include <sys/arc_impl.h>
|
||||||
|
|
||||||
#ifndef _KERNEL
|
#ifndef _KERNEL
|
||||||
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
|
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
|
||||||
|
@ -218,46 +219,6 @@ unsigned long zfs_arc_max = 0;
|
||||||
unsigned long zfs_arc_min = 0;
|
unsigned long zfs_arc_min = 0;
|
||||||
unsigned long zfs_arc_meta_limit = 0;
|
unsigned long zfs_arc_meta_limit = 0;
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that buffers can be in one of 6 states:
|
|
||||||
* ARC_anon - anonymous (discussed below)
|
|
||||||
* ARC_mru - recently used, currently cached
|
|
||||||
* ARC_mru_ghost - recentely used, no longer in cache
|
|
||||||
* ARC_mfu - frequently used, currently cached
|
|
||||||
* ARC_mfu_ghost - frequently used, no longer in cache
|
|
||||||
* ARC_l2c_only - exists in L2ARC but not other states
|
|
||||||
* When there are no active references to the buffer, they are
|
|
||||||
* are linked onto a list in one of these arc states. These are
|
|
||||||
* the only buffers that can be evicted or deleted. Within each
|
|
||||||
* state there are multiple lists, one for meta-data and one for
|
|
||||||
* non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
|
|
||||||
* etc.) is tracked separately so that it can be managed more
|
|
||||||
* explicitly: favored over data, limited explicitly.
|
|
||||||
*
|
|
||||||
* Anonymous buffers are buffers that are not associated with
|
|
||||||
* a DVA. These are buffers that hold dirty block copies
|
|
||||||
* before they are written to stable storage. By definition,
|
|
||||||
* they are "ref'd" and are considered part of arc_mru
|
|
||||||
* that cannot be freed. Generally, they will aquire a DVA
|
|
||||||
* as they are written and migrate onto the arc_mru list.
|
|
||||||
*
|
|
||||||
* The ARC_l2c_only state is for buffers that are in the second
|
|
||||||
* level ARC but no longer in any of the ARC_m* lists. The second
|
|
||||||
* level ARC itself may also contain buffers that are in any of
|
|
||||||
* the ARC_m* states - meaning that a buffer can exist in two
|
|
||||||
* places. The reason for the ARC_l2c_only state is to keep the
|
|
||||||
* buffer header in the hash table, so that reads that hit the
|
|
||||||
* second level ARC benefit from these fast lookups.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef struct arc_state {
|
|
||||||
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
|
|
||||||
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
|
|
||||||
uint64_t arcs_size; /* total amount of data in this state */
|
|
||||||
kmutex_t arcs_mtx;
|
|
||||||
arc_state_type_t arcs_state;
|
|
||||||
} arc_state_t;
|
|
||||||
|
|
||||||
/* The 6 states: */
|
/* The 6 states: */
|
||||||
static arc_state_t ARC_anon;
|
static arc_state_t ARC_anon;
|
||||||
static arc_state_t ARC_mru;
|
static arc_state_t ARC_mru;
|
||||||
|
@ -522,69 +483,6 @@ static arc_state_t *arc_l2c_only;
|
||||||
#define L2ARC_IS_VALID_COMPRESS(_c_) \
|
#define L2ARC_IS_VALID_COMPRESS(_c_) \
|
||||||
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
|
((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
|
||||||
|
|
||||||
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
|
|
||||||
|
|
||||||
typedef struct arc_callback arc_callback_t;
|
|
||||||
|
|
||||||
struct arc_callback {
|
|
||||||
void *acb_private;
|
|
||||||
arc_done_func_t *acb_done;
|
|
||||||
arc_buf_t *acb_buf;
|
|
||||||
zio_t *acb_zio_dummy;
|
|
||||||
arc_callback_t *acb_next;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct arc_write_callback arc_write_callback_t;
|
|
||||||
|
|
||||||
struct arc_write_callback {
|
|
||||||
void *awcb_private;
|
|
||||||
arc_done_func_t *awcb_ready;
|
|
||||||
arc_done_func_t *awcb_physdone;
|
|
||||||
arc_done_func_t *awcb_done;
|
|
||||||
arc_buf_t *awcb_buf;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct arc_buf_hdr {
|
|
||||||
/* protected by hash lock */
|
|
||||||
dva_t b_dva;
|
|
||||||
uint64_t b_birth;
|
|
||||||
uint64_t b_cksum0;
|
|
||||||
|
|
||||||
kmutex_t b_freeze_lock;
|
|
||||||
zio_cksum_t *b_freeze_cksum;
|
|
||||||
|
|
||||||
arc_buf_hdr_t *b_hash_next;
|
|
||||||
arc_buf_t *b_buf;
|
|
||||||
uint32_t b_flags;
|
|
||||||
uint32_t b_datacnt;
|
|
||||||
|
|
||||||
arc_callback_t *b_acb;
|
|
||||||
kcondvar_t b_cv;
|
|
||||||
|
|
||||||
/* immutable */
|
|
||||||
arc_buf_contents_t b_type;
|
|
||||||
uint64_t b_size;
|
|
||||||
uint64_t b_spa;
|
|
||||||
|
|
||||||
/* protected by arc state mutex */
|
|
||||||
arc_state_t *b_state;
|
|
||||||
list_node_t b_arc_node;
|
|
||||||
|
|
||||||
/* updated atomically */
|
|
||||||
clock_t b_arc_access;
|
|
||||||
uint32_t b_mru_hits;
|
|
||||||
uint32_t b_mru_ghost_hits;
|
|
||||||
uint32_t b_mfu_hits;
|
|
||||||
uint32_t b_mfu_ghost_hits;
|
|
||||||
uint32_t b_l2_hits;
|
|
||||||
|
|
||||||
/* self protecting */
|
|
||||||
refcount_t b_refcnt;
|
|
||||||
|
|
||||||
l2arc_buf_hdr_t *b_l2hdr;
|
|
||||||
list_node_t b_l2node;
|
|
||||||
};
|
|
||||||
|
|
||||||
static list_t arc_prune_list;
|
static list_t arc_prune_list;
|
||||||
static kmutex_t arc_prune_mtx;
|
static kmutex_t arc_prune_mtx;
|
||||||
static arc_buf_t *arc_eviction_list;
|
static arc_buf_t *arc_eviction_list;
|
||||||
|
@ -707,19 +605,6 @@ int l2arc_norw = B_FALSE; /* no reads during writes */
|
||||||
/*
|
/*
|
||||||
* L2ARC Internals
|
* L2ARC Internals
|
||||||
*/
|
*/
|
||||||
typedef struct l2arc_dev {
|
|
||||||
vdev_t *l2ad_vdev; /* vdev */
|
|
||||||
spa_t *l2ad_spa; /* spa */
|
|
||||||
uint64_t l2ad_hand; /* next write location */
|
|
||||||
uint64_t l2ad_start; /* first addr on device */
|
|
||||||
uint64_t l2ad_end; /* last addr on device */
|
|
||||||
uint64_t l2ad_evict; /* last addr eviction reached */
|
|
||||||
boolean_t l2ad_first; /* first sweep through */
|
|
||||||
boolean_t l2ad_writing; /* currently writing */
|
|
||||||
list_t *l2ad_buflist; /* buffer list */
|
|
||||||
list_node_t l2ad_node; /* device list node */
|
|
||||||
} l2arc_dev_t;
|
|
||||||
|
|
||||||
static list_t L2ARC_dev_list; /* device list */
|
static list_t L2ARC_dev_list; /* device list */
|
||||||
static list_t *l2arc_dev_list; /* device list pointer */
|
static list_t *l2arc_dev_list; /* device list pointer */
|
||||||
static kmutex_t l2arc_dev_mtx; /* device list mutex */
|
static kmutex_t l2arc_dev_mtx; /* device list mutex */
|
||||||
|
@ -2043,7 +1928,7 @@ top:
|
||||||
|
|
||||||
if (bytes_evicted < bytes)
|
if (bytes_evicted < bytes)
|
||||||
dprintf("only evicted %lld bytes from %x\n",
|
dprintf("only evicted %lld bytes from %x\n",
|
||||||
(longlong_t)bytes_evicted, state);
|
(longlong_t)bytes_evicted, state->arcs_state);
|
||||||
|
|
||||||
if (skipped)
|
if (skipped)
|
||||||
ARCSTAT_INCR(arcstat_evict_skip, skipped);
|
ARCSTAT_INCR(arcstat_evict_skip, skipped);
|
||||||
|
|
|
@ -221,7 +221,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
|
||||||
|
|
||||||
if (zfs_free_leak_on_eio)
|
if (zfs_free_leak_on_eio)
|
||||||
flags |= TRAVERSE_HARD;
|
flags |= TRAVERSE_HARD;
|
||||||
zfs_dbgmsg("bptree index %d: traversing from min_txg=%lld "
|
zfs_dbgmsg("bptree index %lld: traversing from min_txg=%lld "
|
||||||
"bookmark %lld/%lld/%lld/%lld",
|
"bookmark %lld/%lld/%lld/%lld",
|
||||||
i, (longlong_t)bte.be_birth_txg,
|
i, (longlong_t)bte.be_birth_txg,
|
||||||
(longlong_t)bte.be_zb.zb_objset,
|
(longlong_t)bte.be_zb.zb_objset,
|
||||||
|
|
|
@ -238,6 +238,53 @@ static avl_tree_t spa_l2cache_avl;
|
||||||
kmem_cache_t *spa_buffer_pool;
|
kmem_cache_t *spa_buffer_pool;
|
||||||
int spa_mode_global;
|
int spa_mode_global;
|
||||||
|
|
||||||
|
#ifdef ZFS_DEBUG
|
||||||
|
/* Everything except dprintf and spa is on by default in debug builds */
|
||||||
|
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
|
||||||
|
#else
|
||||||
|
int zfs_flags = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* zfs_recover can be set to nonzero to attempt to recover from
|
||||||
|
* otherwise-fatal errors, typically caused by on-disk corruption. When
|
||||||
|
* set, calls to zfs_panic_recover() will turn into warning messages.
|
||||||
|
* This should only be used as a last resort, as it typically results
|
||||||
|
* in leaked space, or worse.
|
||||||
|
*/
|
||||||
|
int zfs_recover = B_FALSE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If destroy encounters an EIO while reading metadata (e.g. indirect
|
||||||
|
* blocks), space referenced by the missing metadata can not be freed.
|
||||||
|
* Normally this causes the background destroy to become "stalled", as
|
||||||
|
* it is unable to make forward progress. While in this stalled state,
|
||||||
|
* all remaining space to free from the error-encountering filesystem is
|
||||||
|
* "temporarily leaked". Set this flag to cause it to ignore the EIO,
|
||||||
|
* permanently leak the space from indirect blocks that can not be read,
|
||||||
|
* and continue to free everything else that it can.
|
||||||
|
*
|
||||||
|
* The default, "stalling" behavior is useful if the storage partially
|
||||||
|
* fails (i.e. some but not all i/os fail), and then later recovers. In
|
||||||
|
* this case, we will be able to continue pool operations while it is
|
||||||
|
* partially failed, and when it recovers, we can continue to free the
|
||||||
|
* space, with no leaks. However, note that this case is actually
|
||||||
|
* fairly rare.
|
||||||
|
*
|
||||||
|
* Typically pools either (a) fail completely (but perhaps temporarily,
|
||||||
|
* e.g. a top-level vdev going offline), or (b) have localized,
|
||||||
|
* permanent errors (e.g. disk returns the wrong data due to bit flip or
|
||||||
|
* firmware bug). In case (a), this setting does not matter because the
|
||||||
|
* pool will be suspended and the sync thread will not be able to make
|
||||||
|
* forward progress regardless. In case (b), because the error is
|
||||||
|
* permanent, the best we can do is leak the minimum amount of space,
|
||||||
|
* which is what setting this flag will do. Therefore, it is reasonable
|
||||||
|
* for this flag to normally be set, but we chose the more conservative
|
||||||
|
* approach of not setting it, so that there is no possibility of
|
||||||
|
* leaking space in the "partial temporary" failure case.
|
||||||
|
*/
|
||||||
|
int zfs_free_leak_on_eio = B_FALSE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Expiration time in milliseconds. This value has two meanings. First it is
|
* Expiration time in milliseconds. This value has two meanings. First it is
|
||||||
* used to determine when the spa_deadman() logic should fire. By default the
|
* used to determine when the spa_deadman() logic should fire. By default the
|
||||||
|
@ -1319,6 +1366,16 @@ spa_freeze(spa_t *spa)
|
||||||
txg_wait_synced(spa_get_dsl(spa), freeze_txg);
|
txg_wait_synced(spa_get_dsl(spa), freeze_txg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
zfs_panic_recover(const char *fmt, ...)
|
||||||
|
{
|
||||||
|
va_list adx;
|
||||||
|
|
||||||
|
va_start(adx, fmt);
|
||||||
|
vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
|
||||||
|
va_end(adx);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a stripped-down version of strtoull, suitable only for converting
|
* This is a stripped-down version of strtoull, suitable only for converting
|
||||||
* lowercase hexadecimal numbers that don't overflow.
|
* lowercase hexadecimal numbers that don't overflow.
|
||||||
|
@ -1923,6 +1980,16 @@ EXPORT_SYMBOL(spa_mode);
|
||||||
|
|
||||||
EXPORT_SYMBOL(spa_namespace_lock);
|
EXPORT_SYMBOL(spa_namespace_lock);
|
||||||
|
|
||||||
|
module_param(zfs_flags, int, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
|
||||||
|
|
||||||
|
module_param(zfs_recover, int, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
|
||||||
|
|
||||||
|
module_param(zfs_free_leak_on_eio, int, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_free_leak_on_eio,
|
||||||
|
"Set to ignore IO errors during free and permanently leak the space");
|
||||||
|
|
||||||
module_param(zfs_deadman_synctime_ms, ulong, 0644);
|
module_param(zfs_deadman_synctime_ms, ulong, 0644);
|
||||||
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
|
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
|
||||||
|
|
||||||
|
|
|
@ -427,7 +427,7 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
|
||||||
doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
|
doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
|
||||||
doi.doi_data_block_size != space_map_blksz) {
|
doi.doi_data_block_size != space_map_blksz) {
|
||||||
zfs_dbgmsg("txg %llu, spa %s, reallocating: "
|
zfs_dbgmsg("txg %llu, spa %s, reallocating: "
|
||||||
"old bonus %u, old blocksz %u", dmu_tx_get_txg(tx),
|
"old bonus %llu, old blocksz %u", dmu_tx_get_txg(tx),
|
||||||
spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
|
spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
|
||||||
|
|
||||||
space_map_free(sm, tx);
|
space_map_free(sm, tx);
|
||||||
|
|
|
@ -2,9 +2,8 @@
|
||||||
* CDDL HEADER START
|
* CDDL HEADER START
|
||||||
*
|
*
|
||||||
* The contents of this file are subject to the terms of the
|
* The contents of this file are subject to the terms of the
|
||||||
* Common Development and Distribution License, Version 1.0 only
|
* Common Development and Distribution License (the "License").
|
||||||
* (the "License"). You may not use this file except in compliance
|
* You may not use this file except in compliance with the License.
|
||||||
* with the License.
|
|
||||||
*
|
*
|
||||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
* or http://www.opensolaris.org/os/licensing.
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
@ -20,17 +19,21 @@
|
||||||
* CDDL HEADER END
|
* CDDL HEADER END
|
||||||
*/
|
*/
|
||||||
/*
|
/*
|
||||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
* Each Linux tracepoints subsystem must define CREATE_TRACE_POINTS in one
|
||||||
* Use is subject to license terms.
|
* (and only one) C file, so this dummy file exists for that purpose.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _LIBSPL_SYS_SDT_H
|
#include <sys/arc_impl.h>
|
||||||
#define _LIBSPL_SYS_SDT_H
|
#include <sys/vdev_impl.h>
|
||||||
|
#include <sys/zio.h>
|
||||||
|
#include <sys/dbuf.h>
|
||||||
|
#include <sys/dmu_objset.h>
|
||||||
|
#include <sys/dsl_dataset.h>
|
||||||
|
#include <sys/dmu_tx.h>
|
||||||
|
#include <sys/dnode.h>
|
||||||
|
#include <sys/zfs_znode.h>
|
||||||
|
#include <sys/zil_impl.h>
|
||||||
|
#include <sys/zrlock.h>
|
||||||
|
|
||||||
#define DTRACE_PROBE(a) ((void) 0)
|
#define CREATE_TRACE_POINTS
|
||||||
#define DTRACE_PROBE1(a, b, c) ((void) 0)
|
#include <sys/trace.h>
|
||||||
#define DTRACE_PROBE2(a, b, c, d, e) ((void) 0)
|
|
||||||
#define DTRACE_PROBE3(a, b, c, d, e, f, g) ((void) 0)
|
|
||||||
#define DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) ((void) 0)
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -209,7 +209,7 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
|
||||||
tbl->zt_nextblk = 0;
|
tbl->zt_nextblk = 0;
|
||||||
tbl->zt_blks_copied = 0;
|
tbl->zt_blks_copied = 0;
|
||||||
|
|
||||||
dprintf("finished; numblocks now %llu (%lluk entries)\n",
|
dprintf("finished; numblocks now %llu (%uk entries)\n",
|
||||||
tbl->zt_numblks, 1<<(tbl->zt_shift-10));
|
tbl->zt_numblks, 1<<(tbl->zt_shift-10));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,99 +25,22 @@
|
||||||
|
|
||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
|
|
||||||
#if !defined(_KERNEL) || !defined(__linux__)
|
|
||||||
list_t zfs_dbgmsgs;
|
list_t zfs_dbgmsgs;
|
||||||
int zfs_dbgmsg_size;
|
int zfs_dbgmsg_size;
|
||||||
kmutex_t zfs_dbgmsgs_lock;
|
kmutex_t zfs_dbgmsgs_lock;
|
||||||
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
|
int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Enable various debugging features.
|
|
||||||
*/
|
|
||||||
int zfs_flags = 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* zfs_recover can be set to nonzero to attempt to recover from
|
|
||||||
* otherwise-fatal errors, typically caused by on-disk corruption. When
|
|
||||||
* set, calls to zfs_panic_recover() will turn into warning messages.
|
|
||||||
* This should only be used as a last resort, as it typically results
|
|
||||||
* in leaked space, or worse.
|
|
||||||
*/
|
|
||||||
int zfs_recover = B_FALSE;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If destroy encounters an EIO while reading metadata (e.g. indirect
|
|
||||||
* blocks), space referenced by the missing metadata can not be freed.
|
|
||||||
* Normally this causes the background destroy to become "stalled", as
|
|
||||||
* it is unable to make forward progress. While in this stalled state,
|
|
||||||
* all remaining space to free from the error-encountering filesystem is
|
|
||||||
* "temporarily leaked". Set this flag to cause it to ignore the EIO,
|
|
||||||
* permanently leak the space from indirect blocks that can not be read,
|
|
||||||
* and continue to free everything else that it can.
|
|
||||||
*
|
|
||||||
* The default, "stalling" behavior is useful if the storage partially
|
|
||||||
* fails (i.e. some but not all i/os fail), and then later recovers. In
|
|
||||||
* this case, we will be able to continue pool operations while it is
|
|
||||||
* partially failed, and when it recovers, we can continue to free the
|
|
||||||
* space, with no leaks. However, note that this case is actually
|
|
||||||
* fairly rare.
|
|
||||||
*
|
|
||||||
* Typically pools either (a) fail completely (but perhaps temporarily,
|
|
||||||
* e.g. a top-level vdev going offline), or (b) have localized,
|
|
||||||
* permanent errors (e.g. disk returns the wrong data due to bit flip or
|
|
||||||
* firmware bug). In case (a), this setting does not matter because the
|
|
||||||
* pool will be suspended and the sync thread will not be able to make
|
|
||||||
* forward progress regardless. In case (b), because the error is
|
|
||||||
* permanent, the best we can do is leak the minimum amount of space,
|
|
||||||
* which is what setting this flag will do. Therefore, it is reasonable
|
|
||||||
* for this flag to normally be set, but we chose the more conservative
|
|
||||||
* approach of not setting it, so that there is no possibility of
|
|
||||||
* leaking space in the "partial temporary" failure case.
|
|
||||||
*/
|
|
||||||
int zfs_free_leak_on_eio = B_FALSE;
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
zfs_panic_recover(const char *fmt, ...)
|
|
||||||
{
|
|
||||||
va_list adx;
|
|
||||||
|
|
||||||
va_start(adx, fmt);
|
|
||||||
vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
|
|
||||||
va_end(adx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Debug logging is enabled by default for production kernel builds.
|
|
||||||
* The overhead for this is negligible and the logs can be valuable when
|
|
||||||
* debugging. For non-production user space builds all debugging except
|
|
||||||
* logging is enabled since performance is no longer a concern.
|
|
||||||
*/
|
|
||||||
void
|
void
|
||||||
zfs_dbgmsg_init(void)
|
zfs_dbgmsg_init(void)
|
||||||
{
|
{
|
||||||
#if !defined(_KERNEL) || !defined(__linux__)
|
|
||||||
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
|
list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
|
||||||
offsetof(zfs_dbgmsg_t, zdm_node));
|
offsetof(zfs_dbgmsg_t, zdm_node));
|
||||||
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
#endif
|
|
||||||
|
|
||||||
if (zfs_flags == 0) {
|
|
||||||
#if defined(_KERNEL)
|
|
||||||
zfs_flags = ZFS_DEBUG_DPRINTF;
|
|
||||||
spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
|
|
||||||
spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
|
|
||||||
#else
|
|
||||||
zfs_flags = ~ZFS_DEBUG_DPRINTF;
|
|
||||||
#endif /* _KERNEL */
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_dbgmsg_fini(void)
|
zfs_dbgmsg_fini(void)
|
||||||
{
|
{
|
||||||
#if !defined(_KERNEL) || !defined(__linux__)
|
|
||||||
zfs_dbgmsg_t *zdm;
|
zfs_dbgmsg_t *zdm;
|
||||||
|
|
||||||
while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
|
while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
|
||||||
|
@ -127,25 +50,24 @@ zfs_dbgmsg_fini(void)
|
||||||
}
|
}
|
||||||
mutex_destroy(&zfs_dbgmsgs_lock);
|
mutex_destroy(&zfs_dbgmsgs_lock);
|
||||||
ASSERT0(zfs_dbgmsg_size);
|
ASSERT0(zfs_dbgmsg_size);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(_KERNEL) || !defined(__linux__)
|
|
||||||
/*
|
/*
|
||||||
* Print these messages by running:
|
* To get this data enable the zfs__dbgmsg tracepoint as shown:
|
||||||
* echo ::zfs_dbgmsg | mdb -k
|
|
||||||
*
|
*
|
||||||
* Monitor these messages by running:
|
* # Enable zfs__dbgmsg tracepoint, clear the tracepoint ring buffer
|
||||||
* dtrace -qn 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
|
* $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
|
||||||
|
* $ echo 0 > /sys/kernel/debug/tracing/trace
|
||||||
*
|
*
|
||||||
* When used with libzpool, monitor with:
|
* # Dump the ring buffer.
|
||||||
* dtrace -qn 'zfs$pid::zfs_dbgmsg:probe1{printf("%s\n", copyinstr(arg1))}'
|
* $ cat /sys/kernel/debug/tracing/trace
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
zfs_dbgmsg(const char *fmt, ...)
|
zfs_dbgmsg(const char *fmt, ...)
|
||||||
{
|
{
|
||||||
int size;
|
int size;
|
||||||
va_list adx;
|
va_list adx;
|
||||||
|
char *nl;
|
||||||
zfs_dbgmsg_t *zdm;
|
zfs_dbgmsg_t *zdm;
|
||||||
|
|
||||||
va_start(adx, fmt);
|
va_start(adx, fmt);
|
||||||
|
@ -156,13 +78,20 @@ zfs_dbgmsg(const char *fmt, ...)
|
||||||
* There is one byte of string in sizeof (zfs_dbgmsg_t), used
|
* There is one byte of string in sizeof (zfs_dbgmsg_t), used
|
||||||
* for the terminating null.
|
* for the terminating null.
|
||||||
*/
|
*/
|
||||||
zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
|
zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_PUSHPAGE);
|
||||||
zdm->zdm_timestamp = gethrestime_sec();
|
zdm->zdm_timestamp = gethrestime_sec();
|
||||||
|
|
||||||
va_start(adx, fmt);
|
va_start(adx, fmt);
|
||||||
(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
|
(void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
|
||||||
va_end(adx);
|
va_end(adx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get rid of trailing newline.
|
||||||
|
*/
|
||||||
|
nl = strrchr(zdm->zdm_msg, '\n');
|
||||||
|
if (nl != NULL)
|
||||||
|
*nl = '\0';
|
||||||
|
|
||||||
DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
|
DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
|
||||||
|
|
||||||
mutex_enter(&zfs_dbgmsgs_lock);
|
mutex_enter(&zfs_dbgmsgs_lock);
|
||||||
|
@ -180,6 +109,7 @@ zfs_dbgmsg(const char *fmt, ...)
|
||||||
void
|
void
|
||||||
zfs_dbgmsg_print(const char *tag)
|
zfs_dbgmsg_print(const char *tag)
|
||||||
{
|
{
|
||||||
|
#if !defined(_KERNEL)
|
||||||
zfs_dbgmsg_t *zdm;
|
zfs_dbgmsg_t *zdm;
|
||||||
|
|
||||||
(void) printf("ZFS_DBGMSG(%s):\n", tag);
|
(void) printf("ZFS_DBGMSG(%s):\n", tag);
|
||||||
|
@ -188,17 +118,5 @@ zfs_dbgmsg_print(const char *tag)
|
||||||
zdm = list_next(&zfs_dbgmsgs, zdm))
|
zdm = list_next(&zfs_dbgmsgs, zdm))
|
||||||
(void) printf("%s\n", zdm->zdm_msg);
|
(void) printf("%s\n", zdm->zdm_msg);
|
||||||
mutex_exit(&zfs_dbgmsgs_lock);
|
mutex_exit(&zfs_dbgmsgs_lock);
|
||||||
|
#endif /* !_KERNEL */
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
|
||||||
module_param(zfs_flags, int, 0644);
|
|
||||||
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
|
|
||||||
|
|
||||||
module_param(zfs_recover, int, 0644);
|
|
||||||
MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
|
|
||||||
|
|
||||||
module_param(zfs_free_leak_on_eio, int, 0644);
|
|
||||||
MODULE_PARM_DESC(zfs_free_leak_on_eio,
|
|
||||||
"Set to ignore IO errors during free and permanently leak the space");
|
|
||||||
#endif /* _KERNEL */
|
|
||||||
|
|
|
@ -247,6 +247,55 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
|
||||||
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
|
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
|
||||||
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
|
static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
|
||||||
|
|
||||||
|
#if defined(HAVE_DECLARE_EVENT_CLASS)
|
||||||
|
void
|
||||||
|
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
|
||||||
|
{
|
||||||
|
const char *newfile;
|
||||||
|
size_t size = 4096;
|
||||||
|
char *buf = kmem_alloc(size, KM_PUSHPAGE);
|
||||||
|
char *nl;
|
||||||
|
va_list adx;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get rid of annoying prefix to filename.
|
||||||
|
*/
|
||||||
|
newfile = strrchr(file, '/');
|
||||||
|
if (newfile != NULL) {
|
||||||
|
newfile = newfile + 1; /* Get rid of leading / */
|
||||||
|
} else {
|
||||||
|
newfile = file;
|
||||||
|
}
|
||||||
|
|
||||||
|
va_start(adx, fmt);
|
||||||
|
(void) vsnprintf(buf, size, fmt, adx);
|
||||||
|
va_end(adx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get rid of trailing newline.
|
||||||
|
*/
|
||||||
|
nl = strrchr(buf, '\n');
|
||||||
|
if (nl != NULL)
|
||||||
|
*nl = '\0';
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To get this data enable the zfs__dprintf trace point as shown:
|
||||||
|
*
|
||||||
|
* # Enable zfs__dprintf tracepoint, clear the tracepoint ring buffer
|
||||||
|
* $ echo 1 > /sys/module/zfs/parameters/zfs_flags
|
||||||
|
* $ echo 1 > /sys/kernel/debug/tracing/events/zfs/enable
|
||||||
|
* $ echo 0 > /sys/kernel/debug/tracing/trace
|
||||||
|
*
|
||||||
|
* # Dump the ring buffer.
|
||||||
|
* $ cat /sys/kernel/debug/tracing/trace
|
||||||
|
*/
|
||||||
|
DTRACE_PROBE4(zfs__dprintf,
|
||||||
|
char *, newfile, char *, func, int, line, char *, buf);
|
||||||
|
|
||||||
|
kmem_free(buf, size);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_DECLARE_EVENT_CLASS */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
history_str_free(char *buf)
|
history_str_free(char *buf)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1436,7 +1436,7 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
|
||||||
|
|
||||||
gen_mask = -1ULL >> (64 - 8 * i);
|
gen_mask = -1ULL >> (64 - 8 * i);
|
||||||
|
|
||||||
dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
|
dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
|
||||||
if ((err = zfs_zget(zsb, object, &zp))) {
|
if ((err = zfs_zget(zsb, object, &zp))) {
|
||||||
ZFS_EXIT(zsb);
|
ZFS_EXIT(zsb);
|
||||||
return (err);
|
return (err);
|
||||||
|
@ -1447,7 +1447,8 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
|
||||||
if (zp_gen == 0)
|
if (zp_gen == 0)
|
||||||
zp_gen = 1;
|
zp_gen = 1;
|
||||||
if (zp->z_unlinked || zp_gen != fid_gen) {
|
if (zp->z_unlinked || zp_gen != fid_gen) {
|
||||||
dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
|
dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
|
||||||
|
fid_gen);
|
||||||
iput(ZTOI(zp));
|
iput(ZTOI(zp));
|
||||||
ZFS_EXIT(zsb);
|
ZFS_EXIT(zsb);
|
||||||
return (SET_ERROR(EINVAL));
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
|
@ -597,8 +597,9 @@ line: while (<$filehandle>) {
|
||||||
if (/\(\s/) {
|
if (/\(\s/) {
|
||||||
err("whitespace after left paren");
|
err("whitespace after left paren");
|
||||||
}
|
}
|
||||||
# allow "for" statements to have empty "continue" clauses
|
# Allow "for" statements to have empty "continue" clauses.
|
||||||
if (/\s\)/ && !/^\s*for \([^;]*;[^;]*; \)/) {
|
# Allow right paren on its own line unless we're being picky (-p).
|
||||||
|
if (/\s\)/ && !/^\s*for \([^;]*;[^;]*; \)/ && ($picky || !/^\s*\)/)) {
|
||||||
err("whitespace before right paren");
|
err("whitespace before right paren");
|
||||||
}
|
}
|
||||||
if (/^\s*\(void\)[^ ]/) {
|
if (/^\s*\(void\)[^ ]/) {
|
||||||
|
|
|
@ -33,7 +33,6 @@ MODULE-OPTIONS:
|
||||||
|
|
||||||
$0 zfs="zfs_prefetch_disable=1"
|
$0 zfs="zfs_prefetch_disable=1"
|
||||||
$0 zfs="zfs_prefetch_disable=1 zfs_mdcomp_disable=1"
|
$0 zfs="zfs_prefetch_disable=1 zfs_mdcomp_disable=1"
|
||||||
$0 spl="spl_debug_mask=0"
|
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue