From db75854cbbab3f0c82146df2d8619a69200710e3 Mon Sep 17 00:00:00 2001 From: Don Brady Date: Mon, 19 Oct 2020 12:01:00 -0600 Subject: [PATCH] zed syslog entries drop important info ZED will log zevents summaries to the syslog, however the log entries tend to drop event details that can be useful for diagnosis. This is especially true for ereport events, like io, checksum, and delay. Update the all-syslog.sh script to log additional event information. Add an optional config option, ZED_SYSLOG_DISPLAY_GUIDS, to zed.rc for choosing GUIDs over names for pool and vdev. Change the default ZED_SYSLOG_SUBCLASS_EXCLUDE to exclude history_event events. These events tend to be frequent, convey no meaningful info, and are already logged in the zpool history. Reviewed-by: John Kennedy Reviewed-by: Pavel Zakharov Reviewed-by: Brian Behlendorf Signed-off-by: Don Brady Closes #10967 --- cmd/zed/zed.d/all-syslog.sh | 44 +++++++++++-- cmd/zed/zed.d/zed.rc | 7 ++- .../functional/events/events_001_pos.ksh | 28 ++++----- .../functional/events/events_002_pos.ksh | 2 +- .../functional/events/events_common.kshlib | 61 ++++++++++++++----- 5 files changed, 107 insertions(+), 35 deletions(-) diff --git a/cmd/zed/zed.d/all-syslog.sh b/cmd/zed/zed.d/all-syslog.sh index cb92865001..270b1bc67e 100755 --- a/cmd/zed/zed.d/all-syslog.sh +++ b/cmd/zed/zed.d/all-syslog.sh @@ -1,14 +1,50 @@ #!/bin/sh +# +# Copyright (C) 2013-2014 Lawrence Livermore National Security, LLC. +# Copyright (c) 2020 by Delphix. All rights reserved. +# + # # Log the zevent via syslog. +# [ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc" . "${ZED_ZEDLET_DIR}/zed-functions.sh" zed_exit_if_ignoring_this_event -zed_log_msg "eid=${ZEVENT_EID}" "class=${ZEVENT_SUBCLASS}" \ - "${ZEVENT_POOL_GUID:+"pool_guid=${ZEVENT_POOL_GUID}"}" \ - "${ZEVENT_VDEV_PATH:+"vdev_path=${ZEVENT_VDEV_PATH}"}" \ - "${ZEVENT_VDEV_STATE_STR:+"vdev_state=${ZEVENT_VDEV_STATE_STR}"}" +# build a string of name=value pairs for this event +msg="eid=${ZEVENT_EID} class=${ZEVENT_SUBCLASS}" + +if [ "${ZED_SYSLOG_DISPLAY_GUIDS}" = "1" ]; then + [ -n "${ZEVENT_POOL_GUID}" ] && msg="${msg} pool_guid=${ZEVENT_POOL_GUID}" + [ -n "${ZEVENT_VDEV_GUID}" ] && msg="${msg} vdev_guid=${ZEVENT_VDEV_GUID}" +else + [ -n "${ZEVENT_POOL}" ] && msg="${msg} pool='${ZEVENT_POOL}'" + [ -n "${ZEVENT_VDEV_PATH}" ] && msg="${msg} vdev=$(basename "${ZEVENT_VDEV_PATH}")" +fi + +# log pool state if state is anything other than 'ACTIVE' +[ -n "${ZEVENT_POOL_STATE_STR}" ] && [ "$ZEVENT_POOL_STATE" -ne 0 ] && \ + msg="${msg} pool_state=${ZEVENT_POOL_STATE_STR}" + +# Log the following payload nvpairs if they are present +[ -n "${ZEVENT_VDEV_STATE_STR}" ] && msg="${msg} vdev_state=${ZEVENT_VDEV_STATE_STR}" +[ -n "${ZEVENT_CKSUM_ALGORITHM}" ] && msg="${msg} algorithm=${ZEVENT_CKSUM_ALGORITHM}" +[ -n "${ZEVENT_ZIO_SIZE}" ] && msg="${msg} size=${ZEVENT_ZIO_SIZE}" +[ -n "${ZEVENT_ZIO_OFFSET}" ] && msg="${msg} offset=${ZEVENT_ZIO_OFFSET}" +[ -n "${ZEVENT_ZIO_PRIORITY}" ] && msg="${msg} priority=${ZEVENT_ZIO_PRIORITY}" +[ -n "${ZEVENT_ZIO_ERR}" ] && msg="${msg} err=${ZEVENT_ZIO_ERR}" +[ -n "${ZEVENT_ZIO_FLAGS}" ] && msg="${msg} flags=$(printf '0x%x' "${ZEVENT_ZIO_FLAGS}")" + +# log delays that are >= 10 milisec +[ -n "${ZEVENT_ZIO_DELAY}" ] && [ "$ZEVENT_ZIO_DELAY" -gt 10000000 ] && \ + msg="${msg} delay=$((ZEVENT_ZIO_DELAY / 1000000))ms" + +# list the bookmark data together +[ -n "${ZEVENT_ZIO_OBJSET}" ] && \ + msg="${msg} bookmark=${ZEVENT_ZIO_OBJSET}:${ZEVENT_ZIO_OBJECT}:${ZEVENT_ZIO_LEVEL}:${ZEVENT_ZIO_BLKID}" + +zed_log_msg "${msg}" + exit 0 diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index 1b220d28db..df560f921e 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -118,5 +118,10 @@ ZED_USE_ENCLOSURE_LEDS=1 # Otherwise, if ZED_SYSLOG_SUBCLASS_EXCLUDE is set, the # matching subclasses are excluded from logging. #ZED_SYSLOG_SUBCLASS_INCLUDE="checksum|scrub_*|vdev.*" -#ZED_SYSLOG_SUBCLASS_EXCLUDE="statechange|config_*|history_event" +ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" + +## +# Use GUIDs instead of names when logging pool and vdevs +# Disabled by default, 1 to enable and 0 to disable. +#ZED_SYSLOG_DISPLAY_GUIDS=1 diff --git a/tests/zfs-tests/tests/functional/events/events_001_pos.ksh b/tests/zfs-tests/tests/functional/events/events_001_pos.ksh index 5121f66b78..189cf435e8 100755 --- a/tests/zfs-tests/tests/functional/events/events_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/events/events_001_pos.ksh @@ -94,22 +94,22 @@ run_and_verify -p "$MPOOL"\ -e "resource.fs.zfs.statechange" \ -e "sysevent.fs.zfs.config_sync" \ "zpool offline $MPOOL $VDEV1" -run_and_verify -p "$MPOOL" -d 10 \ +run_and_verify -p "$MPOOL" \ -e "resource.fs.zfs.statechange" \ -e "sysevent.fs.zfs.vdev_online" \ - -e "sysevent.fs.zfs.resilver_start" \ - -e "sysevent.fs.zfs.resilver_finish" \ - -e "sysevent.fs.zfs.history_event" \ -e "sysevent.fs.zfs.config_sync" \ + -e "sysevent.fs.zfs.resilver_start" \ + -e "sysevent.fs.zfs.history_event" \ + -e "sysevent.fs.zfs.resilver_finish" \ "zpool online $MPOOL $VDEV1" # Attach then detach a device from the mirror. -run_and_verify -p "$MPOOL" -d 10 \ +run_and_verify -p "$MPOOL" \ -e "sysevent.fs.zfs.vdev_attach" \ -e "sysevent.fs.zfs.resilver_start" \ - -e "sysevent.fs.zfs.resilver_finish" \ - -e "sysevent.fs.zfs.history_event" \ -e "sysevent.fs.zfs.config_sync" \ + -e "sysevent.fs.zfs.history_event" \ + -e "sysevent.fs.zfs.resilver_finish" \ "zpool attach $MPOOL $VDEV1 $VDEV4" run_and_verify -p "$MPOOL" \ -e "sysevent.fs.zfs.vdev_remove" \ @@ -117,20 +117,20 @@ run_and_verify -p "$MPOOL" \ "zpool detach $MPOOL $VDEV4" # Replace a device -run_and_verify -p "$MPOOL" -d 10 \ +run_and_verify -p "$MPOOL" \ -e "sysevent.fs.zfs.vdev_attach" \ -e "sysevent.fs.zfs.resilver_start" \ + -e "sysevent.fs.zfs.config_sync" \ + -e "sysevent.fs.zfs.history_event" \ -e "sysevent.fs.zfs.resilver_finish" \ -e "sysevent.fs.zfs.vdev_remove" \ - -e "sysevent.fs.zfs.history_event" \ - -e "sysevent.fs.zfs.config_sync" \ "zpool replace -f $MPOOL $VDEV1 $VDEV4" # Scrub a pool. -run_and_verify -p "$MPOOL" -d 10 \ +run_and_verify -p "$MPOOL" \ -e "sysevent.fs.zfs.scrub_start" \ - -e "sysevent.fs.zfs.scrub_finish" \ -e "sysevent.fs.zfs.history_event" \ + -e "sysevent.fs.zfs.scrub_finish" \ "zpool scrub $MPOOL" # Export then import a pool @@ -139,9 +139,9 @@ run_and_verify -p "$MPOOL" \ -e "sysevent.fs.zfs.config_sync" \ "zpool export $MPOOL" run_and_verify -p "$MPOOL" \ - -e "sysevent.fs.zfs.pool_import" \ - -e "sysevent.fs.zfs.history_event" \ -e "sysevent.fs.zfs.config_sync" \ + -e "sysevent.fs.zfs.history_event" \ + -e "sysevent.fs.zfs.pool_import" \ "zpool import -d $TEST_BASE_DIR $MPOOL" # Destroy the pool diff --git a/tests/zfs-tests/tests/functional/events/events_002_pos.ksh b/tests/zfs-tests/tests/functional/events/events_002_pos.ksh index 76ad6237fc..7a78d93a84 100755 --- a/tests/zfs-tests/tests/functional/events/events_002_pos.ksh +++ b/tests/zfs-tests/tests/functional/events/events_002_pos.ksh @@ -92,7 +92,7 @@ done # 5. Start the ZED and verify it only handled the new missed events. log_must zed_start -log_must file_wait $ZED_DEBUG_LOG 15 +log_must file_wait $ZED_DEBUG_LOG 35 log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED log_mustnot grep -q "sysevent.fs.zfs.pool_create" $TMP_EVENTS_ZED diff --git a/tests/zfs-tests/tests/functional/events/events_common.kshlib b/tests/zfs-tests/tests/functional/events/events_common.kshlib index 26afc10917..5f4c629dca 100644 --- a/tests/zfs-tests/tests/functional/events/events_common.kshlib +++ b/tests/zfs-tests/tests/functional/events/events_common.kshlib @@ -23,10 +23,34 @@ # Copyright (c) 2017 by Lawrence Livermore National Security, LLC. # Use is subject to license terms. # +# Copyright (c) 2020 by Delphix. All rights reserved. +# . $STF_SUITE/include/libtest.shlib . $STF_SUITE/tests/functional/events/events.cfg +# +# wait for 'event' to show up in the log 'file' +function file_wait_event # file event timeout +{ + file=$1 + event=$2 + timeout=${3:-120} + + SECONDS=0 + + until grep -q "^ZEVENT_CLASS=$event" $ZED_DEBUG_LOG ; do + if [[ $SECONDS -gt $timeout ]]; then + echo file_wait_event exceeded $SECONDS seconds + return 1 + fi + + sleep 1 + done + + return 0; +} + # # Wait for up to 'timeout' seconds for the 'file' to settle, i.e. # not be updated for a period of 'delay' seconds. @@ -41,6 +65,7 @@ function file_wait # file delay timeout while [ $(( $(date +%s) - $(stat -c %Y $file) )) -lt $delay ]; do if [[ $SECONDS -gt $timeout ]]; then + echo file_wait exceeded $SECONDS seconds return 1 fi @@ -52,30 +77,22 @@ function file_wait # file delay timeout function run_and_verify { - typeset delay event pool zedlog + typeset event pool set -A events - while getopts "d:e:p:z:" opt; do + while getopts "e:p:" opt; do case $opt in - d) - delay=$OPTARG - ;; e) - events[${#events[*]}+1]=$OPTARG + events+=("$OPTARG") ;; p) pool=$OPTARG ;; - z) - zedlog=$OPTARG - ;; esac done shift $(($OPTIND - 1)) pool=${pool:-$TESTPOOL} - delay=${delay:-3} - zedlog=${zedlog:-$ZED_DEBUG_LOG} fullcmd="$1" cmd=$(echo $fullcmd | awk '{print $1}') @@ -87,21 +104,30 @@ function run_and_verify # Remove any previous events from the logs. log_must zpool events -c - log_must truncate -s 0 $zedlog + log_must truncate -s 0 $ZED_DEBUG_LOG # Run the command as provided. log_must eval "$fullcmd" # Collect the new events and verify there are some. log_must zpool sync -f - log_must file_wait $zedlog $delay - log_must cp $zedlog $TMP_EVENTS_ZED log_must eval "zpool events >$TMP_EVENTS 2>/dev/null" log_must eval "zpool events -v > $TMP_EVENTS_FULL 2>/dev/null" log_must test -s $TMP_EVENTS log_must test -s $TMP_EVENTS_FULL - log_must test -s $TMP_EVENTS_ZED + + # If the only event is history then we don't observe zed debug log + if [[ "${events[0]}" != "sysevent.fs.zfs.history_event" ]]; then + # wait for the last event to show up in the debug log + log_must file_wait_event $ZED_DEBUG_LOG ${events[-1]} + + log_must cp $ZED_DEBUG_LOG $TMP_EVENTS_ZED + log_must test -s $TMP_EVENTS_ZED + + log_note "Events logged:" + grep "^ZEVENT_CLASS" $TMP_EVENTS_ZED + fi log_note "Events generated:" cat $TMP_EVENTS @@ -118,6 +144,11 @@ function run_and_verify $TMP_EVENTS_FULL >$TMP_EVENT_FULL log_must grep -q "pool = \"$pool\"" $TMP_EVENT_FULL + # all-debug.sh filters history events (seen in ZED_DEBUG_LOG) + if [[ "$event" == "sysevent.fs.zfs.history_event" ]]; then + continue + fi + # Verify the event was received by the ZED and logged. awk -v event="$event" \ 'BEGIN{FS="\n"; RS=""} $0 ~ event { print $0 }' \