Add a statechange notify zedlet
Now that ZED has internal fault diagnosis and the statechange event is generated for faulted states, we can replace the io-notify and checksum-notify zedlets with one based on statechange. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Don Brady <don.brady@intel.com> Closes #5383
This commit is contained in:
parent
32dec7bd1a
commit
0df15db98f
|
@ -61,23 +61,21 @@ zedexecdir = $(libexecdir)/zfs/zed.d
|
||||||
dist_zedexec_SCRIPTS = \
|
dist_zedexec_SCRIPTS = \
|
||||||
zed.d/all-debug.sh \
|
zed.d/all-debug.sh \
|
||||||
zed.d/all-syslog.sh \
|
zed.d/all-syslog.sh \
|
||||||
zed.d/checksum-notify.sh \
|
|
||||||
zed.d/data-notify.sh \
|
zed.d/data-notify.sh \
|
||||||
zed.d/generic-notify.sh \
|
zed.d/generic-notify.sh \
|
||||||
zed.d/io-notify.sh \
|
|
||||||
zed.d/resilver_finish-notify.sh \
|
zed.d/resilver_finish-notify.sh \
|
||||||
zed.d/scrub_finish-notify.sh \
|
zed.d/scrub_finish-notify.sh \
|
||||||
zed.d/statechange-led.sh \
|
zed.d/statechange-led.sh \
|
||||||
|
zed.d/statechange-notify.sh \
|
||||||
zed.d/vdev_clear-led.sh
|
zed.d/vdev_clear-led.sh
|
||||||
|
|
||||||
zedconfdefaults = \
|
zedconfdefaults = \
|
||||||
all-syslog.sh \
|
all-syslog.sh \
|
||||||
checksum-notify.sh \
|
|
||||||
data-notify.sh \
|
data-notify.sh \
|
||||||
io-notify.sh \
|
|
||||||
resilver_finish-notify.sh \
|
resilver_finish-notify.sh \
|
||||||
scrub_finish-notify.sh \
|
scrub_finish-notify.sh \
|
||||||
statechange-led.sh \
|
statechange-led.sh \
|
||||||
|
statechange-notify.sh \
|
||||||
vdev_clear-led.sh
|
vdev_clear-led.sh
|
||||||
|
|
||||||
install-data-hook:
|
install-data-hook:
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
io-notify.sh
|
|
|
@ -1 +0,0 @@
|
||||||
io-notify.sh
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# Send notification in response to a DATA error.
|
||||||
|
#
|
||||||
|
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
||||||
|
# class/pool/[vdev] combination. This protects against spamming the recipient
|
||||||
|
# should multiple events occur together in time for the same pool/[vdev].
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0: notification sent
|
||||||
|
# 1: notification failed
|
||||||
|
# 2: notification not configured
|
||||||
|
# 3: notification suppressed
|
||||||
|
# 9: internal error
|
||||||
|
|
||||||
|
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||||
|
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||||
|
|
||||||
|
[ -n "${ZEVENT_POOL}" ] || exit 9
|
||||||
|
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
||||||
|
[ -n "${ZED_NOTIFY_DATA}" ] || exit 3
|
||||||
|
|
||||||
|
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
|
||||||
|
zed_rate_limit "${rate_limit_tag}" || exit 3
|
||||||
|
|
||||||
|
umask 077
|
||||||
|
note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
|
||||||
|
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||||
|
{
|
||||||
|
echo "ZFS has detected a data error:"
|
||||||
|
echo
|
||||||
|
echo " eid: ${ZEVENT_EID}"
|
||||||
|
echo " class: ${ZEVENT_SUBCLASS}"
|
||||||
|
echo " host: $(hostname)"
|
||||||
|
echo " time: ${ZEVENT_TIME_STRING}"
|
||||||
|
echo " error: ${ZEVENT_ZIO_ERR}"
|
||||||
|
echo " objid: ${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}"
|
||||||
|
echo " pool: ${ZEVENT_POOL}"
|
||||||
|
} > "${note_pathname}"
|
||||||
|
|
||||||
|
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||||
|
rm -f "${note_pathname}"
|
||||||
|
exit "${rv}"
|
|
@ -1,64 +0,0 @@
|
||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Send notification in response to a CHECKSUM, DATA, or IO error.
|
|
||||||
#
|
|
||||||
# Only one notification per ZED_NOTIFY_INTERVAL_SECS will be sent for a given
|
|
||||||
# class/pool/[vdev] combination. This protects against spamming the recipient
|
|
||||||
# should multiple events occur together in time for the same pool/[vdev].
|
|
||||||
#
|
|
||||||
# Exit codes:
|
|
||||||
# 0: notification sent
|
|
||||||
# 1: notification failed
|
|
||||||
# 2: notification not configured
|
|
||||||
# 3: notification suppressed
|
|
||||||
# 9: internal error
|
|
||||||
|
|
||||||
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
|
||||||
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
|
||||||
|
|
||||||
[ -n "${ZEVENT_POOL}" ] || exit 9
|
|
||||||
[ -n "${ZEVENT_SUBCLASS}" ] || exit 9
|
|
||||||
|
|
||||||
if [ "${ZEVENT_SUBCLASS}" != "checksum" ] \
|
|
||||||
&& [ "${ZEVENT_SUBCLASS}" != "data" ] \
|
|
||||||
&& [ "${ZEVENT_SUBCLASS}" != "io" ]; then
|
|
||||||
zed_log_err "unsupported event class \"${ZEVENT_SUBCLASS}\""
|
|
||||||
exit 9
|
|
||||||
fi
|
|
||||||
|
|
||||||
rate_limit_tag="${ZEVENT_POOL};${ZEVENT_VDEV_GUID:-0};${ZEVENT_SUBCLASS};notify"
|
|
||||||
zed_rate_limit "${rate_limit_tag}" || exit 3
|
|
||||||
|
|
||||||
umask 077
|
|
||||||
note_subject="ZFS ${ZEVENT_SUBCLASS} error for ${ZEVENT_POOL} on $(hostname)"
|
|
||||||
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
|
||||||
{
|
|
||||||
[ "${ZEVENT_SUBCLASS}" = "io" ] && article="an" || article="a"
|
|
||||||
|
|
||||||
echo "ZFS has detected ${article} ${ZEVENT_SUBCLASS} error:"
|
|
||||||
echo
|
|
||||||
echo " eid: ${ZEVENT_EID}"
|
|
||||||
echo " class: ${ZEVENT_SUBCLASS}"
|
|
||||||
echo " host: $(hostname)"
|
|
||||||
echo " time: ${ZEVENT_TIME_STRING}"
|
|
||||||
|
|
||||||
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
|
||||||
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
|
||||||
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
|
||||||
|
|
||||||
[ -n "${ZEVENT_VDEV_CKSUM_ERRORS}" ] \
|
|
||||||
&& echo " cksum: ${ZEVENT_VDEV_CKSUM_ERRORS}"
|
|
||||||
|
|
||||||
[ -n "${ZEVENT_VDEV_READ_ERRORS}" ] \
|
|
||||||
&& echo " read: ${ZEVENT_VDEV_READ_ERRORS}"
|
|
||||||
|
|
||||||
[ -n "${ZEVENT_VDEV_WRITE_ERRORS}" ] \
|
|
||||||
&& echo " write: ${ZEVENT_VDEV_WRITE_ERRORS}"
|
|
||||||
|
|
||||||
echo " pool: ${ZEVENT_POOL}"
|
|
||||||
|
|
||||||
} > "${note_pathname}"
|
|
||||||
|
|
||||||
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
|
||||||
rm -f "${note_pathname}"
|
|
||||||
exit "${rv}"
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# CDDL HEADER START
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the terms of the
|
||||||
|
# Common Development and Distribution License Version 1.0 (CDDL-1.0).
|
||||||
|
# You can obtain a copy of the license from the top-level file
|
||||||
|
# "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
|
||||||
|
# You may not use this file except in compliance with the license.
|
||||||
|
#
|
||||||
|
# CDDL HEADER END
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Send notification in response to a fault induced statechange
|
||||||
|
#
|
||||||
|
# ZEVENT_SUBCLASS: 'statechange'
|
||||||
|
# ZEVENT_VDEV_STATE_STR: 'DEGRADED', 'FAULTED' or 'REMOVED'
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0: notification sent
|
||||||
|
# 1: notification failed
|
||||||
|
# 2: notification not configured
|
||||||
|
# 3: statechange not relevant
|
||||||
|
# 4: statechange string missing (unexpected)
|
||||||
|
|
||||||
|
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
|
||||||
|
. "${ZED_ZEDLET_DIR}/zed-functions.sh"
|
||||||
|
|
||||||
|
[ -n "${ZEVENT_VDEV_STATE_STR}" ] || exit 4
|
||||||
|
|
||||||
|
if [ "${ZEVENT_VDEV_STATE_STR}" != "FAULTED" ] \
|
||||||
|
&& [ "${ZEVENT_VDEV_STATE_STR}" != "DEGRADED" ] \
|
||||||
|
&& [ "${ZEVENT_VDEV_STATE_STR}" != "REMOVED" ]; then
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
umask 077
|
||||||
|
note_subject="ZFS device fault for pool ${ZEVENT_POOL_GUID} on $(hostname)"
|
||||||
|
note_pathname="${TMPDIR:="/tmp"}/$(basename -- "$0").${ZEVENT_EID}.$$"
|
||||||
|
{
|
||||||
|
if [ "${ZEVENT_VDEV_STATE_STR}" == "FAULTED" ] ; then
|
||||||
|
echo "The number of I/O errors associated with a ZFS device exceeded"
|
||||||
|
echo "acceptable levels. ZFS has marked the device as faulted."
|
||||||
|
elif [ "${ZEVENT_VDEV_STATE_STR}" == "DEGRADED" ] ; then
|
||||||
|
echo "The number of checksum errors associated with a ZFS device"
|
||||||
|
echo "exceeded acceptable levels. ZFS has marked the device as"
|
||||||
|
echo "degraded."
|
||||||
|
else
|
||||||
|
echo "ZFS has detected that a device was removed."
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo " impact: Fault tolerance of the pool may be compromised."
|
||||||
|
echo " eid: ${ZEVENT_EID}"
|
||||||
|
echo " class: ${ZEVENT_SUBCLASS}"
|
||||||
|
echo " state: ${ZEVENT_VDEV_STATE_STR}"
|
||||||
|
echo " host: $(hostname)"
|
||||||
|
echo " time: ${ZEVENT_TIME_STRING}"
|
||||||
|
|
||||||
|
[ -n "${ZEVENT_VDEV_TYPE}" ] && echo " vtype: ${ZEVENT_VDEV_TYPE}"
|
||||||
|
[ -n "${ZEVENT_VDEV_PATH}" ] && echo " vpath: ${ZEVENT_VDEV_PATH}"
|
||||||
|
[ -n "${ZEVENT_VDEV_PHYSPATH}" ] && echo " vphys: ${ZEVENT_VDEV_PHYSPATH}"
|
||||||
|
[ -n "${ZEVENT_VDEV_GUID}" ] && echo " vguid: ${ZEVENT_VDEV_GUID}"
|
||||||
|
[ -n "${ZEVENT_VDEV_DEVID}" ] && echo " devid: ${ZEVENT_VDEV_DEVID}"
|
||||||
|
|
||||||
|
echo " pool: ${ZEVENT_POOL_GUID}"
|
||||||
|
|
||||||
|
} > "${note_pathname}"
|
||||||
|
|
||||||
|
zed_notify "${note_subject}" "${note_pathname}"; rv=$?
|
||||||
|
|
||||||
|
rm -f "${note_pathname}"
|
||||||
|
exit "${rv}"
|
|
@ -50,6 +50,12 @@
|
||||||
#
|
#
|
||||||
#ZED_NOTIFY_VERBOSE=0
|
#ZED_NOTIFY_VERBOSE=0
|
||||||
|
|
||||||
|
##
|
||||||
|
# Send notifications for 'ereport.fs.zfs.data' events.
|
||||||
|
# Disabled by default
|
||||||
|
#
|
||||||
|
#ZED_NOTIFY_DATA=1
|
||||||
|
|
||||||
##
|
##
|
||||||
# Pushbullet access token.
|
# Pushbullet access token.
|
||||||
# This grants full access to your account -- protect it accordingly!
|
# This grants full access to your account -- protect it accordingly!
|
||||||
|
@ -73,18 +79,6 @@
|
||||||
#
|
#
|
||||||
#ZED_RUNDIR="/var/run"
|
#ZED_RUNDIR="/var/run"
|
||||||
|
|
||||||
##
|
|
||||||
# Replace a device with a hot spare after N checksum errors are detected.
|
|
||||||
# Disabled by default; uncomment to enable.
|
|
||||||
#
|
|
||||||
#ZED_SPARE_ON_CHECKSUM_ERRORS=10
|
|
||||||
|
|
||||||
##
|
|
||||||
# Replace a device with a hot spare after N I/O errors are detected.
|
|
||||||
# Disabled by default; uncomment to enable.
|
|
||||||
#
|
|
||||||
#ZED_SPARE_ON_IO_ERRORS=1
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
|
||||||
# device mapper and multipath devices as well. Your enclosure must be
|
# device mapper and multipath devices as well. Your enclosure must be
|
||||||
|
|
Loading…
Reference in New Issue