diff --git a/cmd/zpool/Makefile.am b/cmd/zpool/Makefile.am index 3f30eff47d..ec9757bd62 100644 --- a/cmd/zpool/Makefile.am +++ b/cmd/zpool/Makefile.am @@ -7,6 +7,7 @@ DEFAULT_INCLUDES += \ -I${top_srcdir}/lib/libzfs/include \ -I${top_srcdir}/lib/libnvpair/include \ -I${top_srcdir}/module/zcommon/include \ + -I${top_srcdir}/module/zfs/include \ -I${top_srcdir}/module/nvpair/include \ -I${top_srcdir}/module/avl/include \ -I${top_srcdir}/module/unicode/include diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index ca3f37b900..63b8086594 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -42,8 +42,9 @@ #include #include #include - #include +#include +#include #include @@ -76,6 +77,7 @@ static int zpool_do_export(int, char **); static int zpool_do_upgrade(int, char **); static int zpool_do_history(int, char **); +static int zpool_do_events(int, char **); static int zpool_do_get(int, char **); static int zpool_do_set(int, char **); @@ -118,6 +120,7 @@ typedef enum { HELP_SCRUB, HELP_STATUS, HELP_UPGRADE, + HELP_EVENTS, HELP_GET, HELP_SET } zpool_help_t; @@ -164,6 +167,8 @@ static zpool_command_t command_table[] = { { "upgrade", zpool_do_upgrade, HELP_UPGRADE }, { NULL }, { "history", zpool_do_history, HELP_HISTORY }, + { "events", zpool_do_events, HELP_EVENTS }, + { NULL }, { "get", zpool_do_get, HELP_GET }, { "set", zpool_do_set, HELP_SET }, }; @@ -225,6 +230,8 @@ get_usage(zpool_help_t idx) { return (gettext("\tupgrade\n" "\tupgrade -v\n" "\tupgrade [-V version] <-a | pool ...>\n")); + case HELP_EVENTS: + return (gettext("\tevents [-vfc]\n")); case HELP_GET: return (gettext("\tget <\"all\" | property[,...]> " " ...\n")); @@ -3768,6 +3775,325 @@ zpool_do_history(int argc, char **argv) return (ret); } +typedef struct ev_opts { + int verbose; + int follow; + int clear; +} ev_opts_t; + +static void +zpool_do_events_short(nvlist_t *nvl) +{ + char ctime_str[26], str[32], *ptr; + int64_t *tv; + uint_t n; + + verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0); + memset(str, ' ', 32); + (void) ctime_r(&tv[0], ctime_str); + (void) strncpy(str, ctime_str+4, 6); /* 'Jun 30' */ + (void) strncpy(str+7, ctime_str+20, 4); /* '1993' */ + (void) strncpy(str+12, ctime_str+11, 8); /* '21:49:08' */ + (void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]);/* '.123456789' */ + (void) printf(gettext("%s "), str); + + verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0); + (void) printf(gettext("%s\n"), ptr); +} + +static void +zpool_do_events_nvprint(nvlist_t *nvl, int depth) +{ + nvpair_t *nvp; + + for (nvp = nvlist_next_nvpair(nvl, NULL); + nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) { + + data_type_t type = nvpair_type(nvp); + const char *name = nvpair_name(nvp); + + boolean_t b; + uint8_t i8; + uint16_t i16; + uint32_t i32; + uint64_t i64; + char *str; + nvlist_t *cnv; + + printf(gettext("%*s%s = "), depth, "", name); + + switch (type) { + case DATA_TYPE_BOOLEAN: + printf(gettext("%s"), "1"); + break; + + case DATA_TYPE_BOOLEAN_VALUE: + (void) nvpair_value_boolean_value(nvp, &b); + printf(gettext("%s"), b ? "1" : "0"); + break; + + case DATA_TYPE_BYTE: + (void) nvpair_value_byte(nvp, &i8); + printf(gettext("0x%x"), i8); + break; + + case DATA_TYPE_INT8: + (void) nvpair_value_int8(nvp, (void *)&i8); + printf(gettext("0x%x"), i8); + break; + + case DATA_TYPE_UINT8: + (void) nvpair_value_uint8(nvp, &i8); + printf(gettext("0x%x"), i8); + break; + + case DATA_TYPE_INT16: + (void) nvpair_value_int16(nvp, (void *)&i16); + printf(gettext("0x%x"), i16); + break; + + case DATA_TYPE_UINT16: + (void) nvpair_value_uint16(nvp, &i16); + printf(gettext("0x%x"), i16); + break; + + case DATA_TYPE_INT32: + (void) nvpair_value_int32(nvp, (void *)&i32); + printf(gettext("0x%x"), i32); + break; + + case DATA_TYPE_UINT32: + (void) nvpair_value_uint32(nvp, &i32); + printf(gettext("0x%x"), i32); + break; + + case DATA_TYPE_INT64: + (void) nvpair_value_int64(nvp, (void *)&i64); + printf(gettext("0x%llx"), (u_longlong_t)i64); + break; + + case DATA_TYPE_UINT64: + (void) nvpair_value_uint64(nvp, &i64); + printf(gettext("0x%llx"), (u_longlong_t)i64); + break; + + case DATA_TYPE_HRTIME: + (void) nvpair_value_hrtime(nvp, (void *)&i64); + printf(gettext("0x%llx"), (u_longlong_t)i64); + break; + + case DATA_TYPE_STRING: + (void) nvpair_value_string(nvp, &str); + printf(gettext("\"%s\""), str ? str : ""); + break; + + case DATA_TYPE_NVLIST: + printf(gettext("(embedded nvlist)\n")); + (void) nvpair_value_nvlist(nvp, &cnv); + zpool_do_events_nvprint(cnv, depth + 8); + printf(gettext("%*s(end %s)\n"), depth, "", name); + break; + + case DATA_TYPE_NVLIST_ARRAY: { + nvlist_t **val; + uint_t i, nelem; + + (void) nvpair_value_nvlist_array(nvp, &val, &nelem); + printf(gettext("(%d embedded nvlists)\n"), nelem); + for (i = 0; i < nelem; i++) { + printf(gettext("%*s%s[%d] = %s\n"), + depth, "", name, i, "(embedded nvlist)"); + zpool_do_events_nvprint(val[i], depth + 8); + printf(gettext("%*s(end %s[%i])\n"), + depth, "", name, i); + } + printf(gettext("%*s(end %s)\n"), depth, "", name); + } + break; + + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + uint_t i, nelem; + + (void) nvpair_value_int8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + uint_t i, nelem; + + (void) nvpair_value_uint8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + uint_t i, nelem; + + (void) nvpair_value_int16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + uint_t i, nelem; + + (void) nvpair_value_uint16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + uint_t i, nelem; + + (void) nvpair_value_int32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + uint_t i, nelem; + + (void) nvpair_value_uint32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%x "), val[i]); + + break; + } + + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + uint_t i, nelem; + + (void) nvpair_value_int64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%llx "), (u_longlong_t)val[i]); + + break; + } + + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + uint_t i, nelem; + + (void) nvpair_value_uint64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + printf(gettext("0x%llx "), (u_longlong_t)val[i]); + + break; + } + + case DATA_TYPE_STRING_ARRAY: + case DATA_TYPE_BOOLEAN_ARRAY: + case DATA_TYPE_BYTE_ARRAY: + case DATA_TYPE_DOUBLE: + case DATA_TYPE_UNKNOWN: + printf(gettext("")); + break; + } + + printf(gettext("\n")); + } +} + +static int +zpool_do_events_next(ev_opts_t *opts) +{ + nvlist_t *nvl; + int ret, dropped; + + (void) printf(gettext("%-27s %s\n"), "TIME", "CLASS"); + + while (1) { + ret = zpool_events_next(g_zfs, &nvl, &dropped, !!opts->follow); + if (ret || nvl == NULL) + break; + + if (dropped > 0) + (void) printf(gettext("dropped %d events\n"), dropped); + + zpool_do_events_short(nvl); + + if (opts->verbose) { + zpool_do_events_nvprint(nvl, 8); + printf(gettext("\n")); + } + + nvlist_free(nvl); + } + + return (ret); +} + +static int +zpool_do_events_clear(ev_opts_t *opts) +{ + int count, ret; + + ret = zpool_events_clear(g_zfs, &count); + if (!ret) + (void) printf(gettext("cleared %d events\n"), count); + + return (ret); +} + +/* + * zpool events [-vfc] + * + * Displays events logs by ZFS. + */ +int +zpool_do_events(int argc, char **argv) +{ + ev_opts_t opts = { 0 }; + int ret; + int c; + + /* check options */ + while ((c = getopt(argc, argv, "vfc")) != -1) { + switch (c) { + case 'v': + opts.verbose = 1; + break; + case 'f': + opts.follow = 1; + break; + case 'c': + opts.clear = 1; + break; + case '?': + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + } + } + argc -= optind; + argv += optind; + + if (opts.clear) + ret = zpool_do_events_clear(&opts); + else + ret = zpool_do_events_next(&opts); + + return ret; +} + static int get_callback(zpool_handle_t *zhp, void *data) { diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index 598403eed4..e50303f104 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -360,6 +360,8 @@ extern int zpool_get_history(zpool_handle_t *, nvlist_t **); extern void zpool_set_history_str(const char *subcommand, int argc, char **argv, char *history_str); extern int zpool_stage_history(libzfs_handle_t *, const char *); +extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int); +extern int zpool_events_clear(libzfs_handle_t *, int *); extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *, size_t len); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index e8c0e7e273..744d2d2423 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -2989,6 +2989,92 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) return (err); } +/* + * Retrieve the next event. If there is a new event available 'nvp' will + * contain a newly allocated nvlist and 'dropped' will be set to the number + * of missed events since the last call to this function. When 'nvp' is + * set to NULL it indicates no new events are available. In either case + * the function returns 0 and it is up to the caller to free 'nvp'. In + * the case of a fatal error the function will return a non-zero value. + * When the function is called in blocking mode it will not return until + * a new event is available. + */ +int +zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int *dropped, int block) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; + int error = 0; + + *nvp = NULL; + *dropped = 0; + + if (!block) + zc.zc_guid = ZEVENT_NONBLOCK; + + if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0) + return (-1); + +retry: + if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) { + switch (errno) { + case ESHUTDOWN: + error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL, + dgettext(TEXT_DOMAIN, "zfs shutdown")); + goto out; + case ENOENT: + /* Blocking error case should not occur */ + if (block) + error = zpool_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot get event")); + + goto out; + case ENOMEM: + if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) { + error = zfs_error_fmt(hdl, EZFS_NOMEM, + dgettext(TEXT_DOMAIN, "cannot get event")); + goto out; + } else { + goto retry; + } + default: + error = zpool_standard_error_fmt(hdl, errno, + dgettext(TEXT_DOMAIN, "cannot get event")); + goto out; + } + } + + error = zcmd_read_dst_nvlist(hdl, &zc, nvp); + if (error != 0) + goto out; + + *dropped = (int)zc.zc_cookie; +out: + zcmd_free_nvlists(&zc); + + return (error); +} + +/* + * Clear all events. + */ +int +zpool_events_clear(libzfs_handle_t *hdl, int *count) +{ + zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; + char msg[1024]; + + (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, + "cannot clear events")); + + if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0) + return (zpool_standard_error_fmt(hdl, errno, msg)); + + if (count != NULL) + *count = (int)zc.zc_cookie; /* # of events cleared */ + + return (0); +} + void zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj, char *pathname, size_t len) diff --git a/lib/libzpool/include/sys/zfs_context.h b/lib/libzpool/include/sys/zfs_context.h index 6d0b65b689..ddeb1532a9 100644 --- a/lib/libzpool/include/sys/zfs_context.h +++ b/lib/libzpool/include/sys/zfs_context.h @@ -58,7 +58,6 @@ extern "C" { #include #include #include -#include #include #include #include @@ -72,8 +71,7 @@ extern "C" { #include #include #include -#include -#include +#include /* * Stack diff --git a/module/zcommon/include/sys/fs/zfs.h b/module/zcommon/include/sys/fs/zfs.h index ef38ea336f..1c10cbf1aa 100644 --- a/module/zcommon/include/sys/fs/zfs.h +++ b/module/zcommon/include/sys/fs/zfs.h @@ -603,7 +603,9 @@ typedef enum zfs_ioc { ZFS_IOC_USERSPACE_UPGRADE, ZFS_IOC_HOLD, ZFS_IOC_RELEASE, - ZFS_IOC_GET_HOLDS + ZFS_IOC_GET_HOLDS, + ZFS_IOC_EVENTS_NEXT, + ZFS_IOC_EVENTS_CLEAR, } zfs_ioc_t; /* diff --git a/module/zfs/dsl_scrub.c b/module/zfs/dsl_scrub.c index 3d59be3f96..0f1eddb136 100644 --- a/module/zfs/dsl_scrub.c +++ b/module/zfs/dsl_scrub.c @@ -94,12 +94,12 @@ dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) if (vdev_resilver_needed(rvd, &dp->dp_scrub_min_txg, &dp->dp_scrub_max_txg)) { spa_event_notify(dp->dp_spa, NULL, - ESC_ZFS_RESILVER_START); + FM_EREPORT_ZFS_RESILVER_START); dp->dp_scrub_max_txg = MIN(dp->dp_scrub_max_txg, tx->tx_txg); } else { spa_event_notify(dp->dp_spa, NULL, - ESC_ZFS_SCRUB_START); + FM_EREPORT_ZFS_SCRUB_START); } /* zero out the scrub stats in all vdev_stat_t's */ @@ -219,7 +219,8 @@ dsl_pool_scrub_cancel_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) *completep ? dp->dp_scrub_max_txg : 0, B_TRUE); if (*completep) spa_event_notify(dp->dp_spa, NULL, dp->dp_scrub_min_txg ? - ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); + FM_EREPORT_ZFS_RESILVER_FINISH : + FM_EREPORT_ZFS_SCRUB_FINISH); spa_errlog_rotate(dp->dp_spa); /* diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 3cc979d41b..4c6aee536e 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -54,49 +54,43 @@ #include #include -#include -#include +#include #include #include -#include #include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include #include #include -#include #include #include +#include +#include +#ifdef _KERNEL +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int zevent_len_max = 0; +int zevent_cols = 80; +int zevent_console = 0; + +static int zevent_len_cur = 0; +static int zevent_waiters = 0; +static int zevent_flags = 0; + +static kmutex_t zevent_lock; +static list_t zevent_list; +static kcondvar_t zevent_cv; +#endif /* _KERNEL */ /* - * URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These - * values must be kept in sync with the FMA source code in usr/src/cmd/fm. - */ -static const char *fm_url = "http://www.sun.com/msg"; -static const char *fm_msgid = "SUNOS-8000-0G"; -static char *volatile fm_panicstr = NULL; - -errorq_t *ereport_errorq; -void *ereport_dumpbuf; -size_t ereport_dumplen; - -static uint_t ereport_chanlen = ERPT_EVCH_MAX; -static evchan_t *ereport_chan = NULL; -static ulong_t ereport_qlen = 0; -static size_t ereport_size = 0; -static int ereport_cols = 80; - -/* - * Common fault management kstats to record ereport generation - * failures + * Common fault management kstats to record event generation failures */ struct erpt_kstat { @@ -113,57 +107,9 @@ static struct erpt_kstat erpt_kstat_data = { { "payload-set-failed", KSTAT_DATA_UINT64 } }; -/*ARGSUSED*/ -static void -fm_drain(void *private, void *data, errorq_elem_t *eep) -{ - nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep); +kstat_t *fm_ksp; - if (!panicstr) - (void) fm_ereport_post(nvl, EVCH_TRYHARD); - else - fm_nvprint(nvl); -} - -void -fm_init(void) -{ - kstat_t *ksp; - - (void) sysevent_evc_bind(FM_ERROR_CHAN, - &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND); - - (void) sysevent_evc_control(ereport_chan, - EVCH_SET_CHAN_LEN, &ereport_chanlen); - - if (ereport_qlen == 0) - ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4); - - if (ereport_size == 0) - ereport_size = ERPT_DATA_SZ; - - ereport_errorq = errorq_nvcreate("fm_ereport_queue", - (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size, - FM_ERR_PIL, ERRORQ_VITAL); - if (ereport_errorq == NULL) - panic("failed to create required ereport error queue"); - - ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP); - ereport_dumplen = ereport_size; - - /* Initialize ereport allocation and generation kstats */ - ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED, - sizeof (struct erpt_kstat) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (ksp != NULL) { - ksp->ks_data = &erpt_kstat_data; - kstat_install(ksp); - } else { - cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); - - } -} +#ifdef _KERNEL /* * Formatting utility function for fm_nvprintr. We attempt to wrap chunks of @@ -182,7 +128,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...) va_end(ap); if (c + width >= cols) { - console_printf("\n\r"); + console_printf("\n"); c = 0; if (format[0] != ' ' && depth > 0) { console_printf(" "); @@ -244,54 +190,54 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) case DATA_TYPE_BYTE: (void) nvpair_value_byte(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT8: (void) nvpair_value_int8(nvp, (void *)&i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_UINT8: (void) nvpair_value_uint8(nvp, &i8); - c = fm_printf(d + 1, c, cols, "%x", i8); + c = fm_printf(d + 1, c, cols, "0x%x", i8); break; case DATA_TYPE_INT16: (void) nvpair_value_int16(nvp, (void *)&i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_UINT16: (void) nvpair_value_uint16(nvp, &i16); - c = fm_printf(d + 1, c, cols, "%x", i16); + c = fm_printf(d + 1, c, cols, "0x%x", i16); break; case DATA_TYPE_INT32: (void) nvpair_value_int32(nvp, (void *)&i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_UINT32: (void) nvpair_value_uint32(nvp, &i32); - c = fm_printf(d + 1, c, cols, "%x", i32); + c = fm_printf(d + 1, c, cols, "0x%x", i32); break; case DATA_TYPE_INT64: (void) nvpair_value_int64(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_UINT64: (void) nvpair_value_uint64(nvp, &i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; case DATA_TYPE_HRTIME: (void) nvpair_value_hrtime(nvp, (void *)&i64); - c = fm_printf(d + 1, c, cols, "%llx", + c = fm_printf(d + 1, c, cols, "0x%llx", (u_longlong_t)i64); break; @@ -321,19 +267,124 @@ fm_nvprintr(nvlist_t *nvl, int d, int c, int cols) } break; + case DATA_TYPE_INT8_ARRAY: { + int8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT8_ARRAY: { + uint8_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint8_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT16_ARRAY: { + int16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT16_ARRAY: { + uint16_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint16_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT32_ARRAY: { + int32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT32_ARRAY: { + uint32_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint32_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_INT64_ARRAY: { + int64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_int64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_UINT64_ARRAY: { + uint64_t *val; + uint_t i, nelem; + + c = fm_printf(d + 1, c, cols, "[ "); + (void) nvpair_value_uint64_array(nvp, &val, &nelem); + for (i = 0; i < nelem; i++) + c = fm_printf(d + 1, c, cols, "0x%llx ", + (u_longlong_t)val[i]); + + c = fm_printf(d + 1, c, cols, "]"); + break; + } + + case DATA_TYPE_STRING_ARRAY: case DATA_TYPE_BOOLEAN_ARRAY: case DATA_TYPE_BYTE_ARRAY: - case DATA_TYPE_INT8_ARRAY: - case DATA_TYPE_UINT8_ARRAY: - case DATA_TYPE_INT16_ARRAY: - case DATA_TYPE_UINT16_ARRAY: - case DATA_TYPE_INT32_ARRAY: - case DATA_TYPE_UINT32_ARRAY: - case DATA_TYPE_INT64_ARRAY: - case DATA_TYPE_UINT64_ARRAY: - case DATA_TYPE_STRING_ARRAY: c = fm_printf(d + 1, c, cols, "[...]"); break; + case DATA_TYPE_UNKNOWN: c = fm_printf(d + 1, c, cols, ""); break; @@ -349,175 +400,211 @@ fm_nvprint(nvlist_t *nvl) char *class; int c = 0; - console_printf("\r"); + console_printf("\n"); if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0) - c = fm_printf(0, c, ereport_cols, "%s", class); + c = fm_printf(0, c, zevent_cols, "%s", class); - if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0) + if (fm_nvprintr(nvl, 0, c, zevent_cols) != 0) console_printf("\n"); console_printf("\n"); } -/* - * Wrapper for panic() that first produces an FMA-style message for admins. - * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this - * is the one exception to that rule and the only error that gets messaged. - * This function is intended for use by subsystems that have detected a fatal - * error and enqueued appropriate ereports and wish to then force a panic. - */ -/*PRINTFLIKE1*/ -void -fm_panic(const char *format, ...) +static zevent_t * +fm_event_alloc(void) { - va_list ap; + zevent_t *ev; - (void) casptr((void *)&fm_panicstr, NULL, (void *)format); - va_start(ap, format); - vpanic(format, ap); - va_end(ap); + ev = kmem_zalloc(sizeof(zevent_t), KM_SLEEP); + if (ev == NULL) + return NULL; + + list_create(&ev->ev_zpd_list, sizeof(zfs_private_data_t), + offsetof(zfs_private_data_t, zpd_node)); + list_link_init(&ev->ev_node); + + return ev; +} + +static void +fm_event_free(zevent_t *ev) +{ + /* Run provided cleanup callback */ + ev->ev_cb(ev->ev_nvl); + + list_destroy(&ev->ev_zpd_list); + kmem_free(ev, sizeof(zevent_t)); +} + +static void +fm_zevent_drain(zevent_t *ev) +{ + zfs_private_data_t *zpd; + + ASSERT(MUTEX_HELD(&zevent_lock)); + list_remove(&zevent_list, ev); + + /* Remove references to this event in all private file data */ + while ((zpd = list_head(&ev->ev_zpd_list)) != NULL) { + list_remove(&ev->ev_zpd_list, zpd); + zpd->zpd_zevent = NULL; + zpd->zpd_dropped++; + } + + fm_event_free(ev); +} + +void +fm_zevent_drain_all(int *count) +{ + zevent_t *ev; + + mutex_enter(&zevent_lock); + while ((ev = list_head(&zevent_list)) != NULL) + fm_zevent_drain(ev); + + *count = zevent_len_cur; + zevent_len_cur = 0; + mutex_exit(&zevent_lock); } /* - * Print any appropriate FMA banner message before the panic message. This - * function is called by panicsys() and prints the message for fm_panic(). - * We print the message here so that it comes after the system is quiesced. - * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix). - * The rest of the message is for the console only and not needed in the log, - * so it is printed using console_printf(). We break it up into multiple - * chunks so as to avoid overflowing any small legacy prom_printf() buffers. + * New zevents are inserted at the head. If the maximum queue + * length is exceeded a zevent will be drained from the tail. + * As part of this any user space processes which currently have + * a reference to this zevent_t in their private data will have + * this reference set to NULL. */ -void -fm_banner(void) +static void +fm_zevent_insert(zevent_t *ev) { - timespec_t tod; - hrtime_t now; + mutex_enter(&zevent_lock); + list_insert_head(&zevent_list, ev); + if (zevent_len_cur >= zevent_len_max) + fm_zevent_drain(list_tail(&zevent_list)); + else + zevent_len_cur++; - if (!fm_panicstr) - return; /* panic was not initiated by fm_panic(); do nothing */ - - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; - } else { - gethrestime(&tod); - now = gethrtime_waitfree(); - } - - cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, " - "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid); - - console_printf( -"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n" -"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n", - fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now); - - console_printf( -"PLATFORM: %s, CSN: -, HOSTNAME: %s\n" -"SOURCE: %s, REV: %s %s\n", - platform, utsname.nodename, utsname.sysname, - utsname.release, utsname.version); - - console_printf( -"DESC: Errors have been detected that require a reboot to ensure system\n" -"integrity. See %s/%s for more information.\n", - fm_url, fm_msgid); - - console_printf( -"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n" -"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n" -"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n"); - - console_printf("\n"); + mutex_exit(&zevent_lock); } /* - * Utility function to write all of the pending ereports to the dump device. - * This function is called at either normal reboot or panic time, and simply - * iterates over the in-transit messages in the ereport sysevent channel. + * Post a zevent */ void -fm_ereport_dump(void) -{ - evchanq_t *chq; - sysevent_t *sep; - erpt_dump_t ed; - - timespec_t tod; - hrtime_t now; - char *buf; - size_t len; - - if (panicstr) { - tod = panic_hrestime; - now = panic_hrtime; - } else { - if (ereport_errorq != NULL) - errorq_drain(ereport_errorq); - gethrestime(&tod); - now = gethrtime_waitfree(); - } - - /* - * In the panic case, sysevent_evc_walk_init() will return NULL. - */ - if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL && - !panicstr) - return; /* event channel isn't initialized yet */ - - while ((sep = sysevent_evc_walk_step(chq)) != NULL) { - if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL) - break; - - ed.ed_magic = ERPT_MAGIC; - ed.ed_chksum = checksum32(buf, len); - ed.ed_size = (uint32_t)len; - ed.ed_pad = 0; - ed.ed_hrt_nsec = SE_TIME(sep); - ed.ed_hrt_base = now; - ed.ed_tod_base.sec = tod.tv_sec; - ed.ed_tod_base.nsec = tod.tv_nsec; - - dumpvp_write(&ed, sizeof (ed)); - dumpvp_write(buf, len); - } - - sysevent_evc_walk_fini(chq); -} - -/* - * Post an error report (ereport) to the sysevent error channel. The error - * channel must be established with a prior call to sysevent_evc_create() - * before publication may occur. - */ -void -fm_ereport_post(nvlist_t *ereport, int evc_flag) +fm_zevent_post(nvlist_t *nvl, zevent_cb_t *cb) { size_t nvl_size = 0; - evchan_t *error_chan; + zevent_t *ev; - (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE); + (void) nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE); if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) { atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); return; } - if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan, - EVCH_CREAT|EVCH_HOLD_PEND) != 0) { + if (zevent_console) + fm_nvprint(nvl); + + ev = fm_event_alloc(); + if (ev == NULL) { atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); return; } - if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR, - SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) { - atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1); - sysevent_evc_unbind(error_chan); - return; - } - sysevent_evc_unbind(error_chan); + ev->ev_nvl = nvl; + ev->ev_cb = cb; + fm_zevent_insert(ev); + cv_broadcast(&zevent_cv); } +/* + * Get the next zevent in the stream. To avoid making an extra copy of the + * nvlist we must call put_nvlist() here safely under the zevent_lock. + */ +int +fm_zevent_next(zfs_private_data_t *zpd, zfs_cmd_t *zc) +{ + zevent_t *ev; + int error; + + mutex_enter(&zevent_lock); + if (zpd->zpd_zevent == NULL) { + /* New stream start at the beginning/tail */ + ev = list_tail(&zevent_list); + if (ev == NULL) { + error = ENOENT; + goto out; + } + } else { + /* Existing stream continue with the next element and remove + * ourselves from the wait queue for the previous element */ + ev = list_prev(&zevent_list, zpd->zpd_zevent); + if (ev == NULL) { + error = ENOENT; + goto out; + } + + list_remove(&zpd->zpd_zevent->ev_zpd_list, zpd); + } + + zpd->zpd_zevent = ev; + list_insert_head(&ev->ev_zpd_list, zpd); + error = put_nvlist(zc, ev->ev_nvl); + zc->zc_cookie = zpd->zpd_dropped; + zpd->zpd_dropped = 0; +out: + mutex_exit(&zevent_lock); + + return error; +} + +int +fm_zevent_wait(zfs_private_data_t *zpd) +{ + int error = 0; + + mutex_enter(&zevent_lock); + + if (zevent_flags & ZEVENT_SHUTDOWN) { + error = ESHUTDOWN; + goto out; + } + + zevent_waiters++; + cv_wait_interruptible(&zevent_cv, &zevent_lock); + if (issig(JUSTLOOKING)) + error = EINTR; + + zevent_waiters--; +out: + mutex_exit(&zevent_lock); + + return error; +} + +void +fm_zevent_init(zfs_private_data_t *zpd) +{ + list_link_init(&zpd->zpd_node); + zpd->zpd_zevent = NULL; + zpd->zpd_dropped = 0; +} + +void +fm_zevent_fini(zfs_private_data_t *zpd) +{ + mutex_enter(&zevent_lock); + if (zpd->zpd_zevent) + list_remove(&zpd->zpd_zevent->ev_zpd_list, zpd); + + zpd->zpd_zevent = NULL; + zpd->zpd_dropped = 0; + mutex_exit(&zevent_lock); +} +#endif /* _KERNEL */ + /* * Wrapppers for FM nvlist allocators */ @@ -795,6 +882,8 @@ fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, { char ereport_class[FM_MAX_CLASS]; const char *name; + timestruc_t tv; + int64_t tv_array[2]; va_list ap; int ret; @@ -826,6 +915,13 @@ fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class, if (ret) atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + + gethrestime(&tv); + tv_array[0] = tv.tv_sec; + tv_array[1] = tv.tv_nsec; + if (nvlist_add_int64_array(ereport, FM_EREPORT_TIME, tv_array, 2)) { + atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1); + } } /* @@ -1146,7 +1242,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) ena = (uint64_t)((format & ENA_FORMAT_MASK) | ((cpuid << ENA_FMT1_CPUID_SHFT) & ENA_FMT1_CPUID_MASK) | - ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) & + ((gethrtime() << ENA_FMT1_TIME_SHFT) & ENA_FMT1_TIME_MASK)); } break; @@ -1164,7 +1260,7 @@ fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format) uint64_t fm_ena_generate(uint64_t timestamp, uchar_t format) { - return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format)); + return (fm_ena_generate_cpu(timestamp, getcpuid(), format)); } uint64_t @@ -1232,35 +1328,67 @@ fm_ena_time_get(uint64_t ena) return (time); } -/* - * Convert a getpcstack() trace to symbolic name+offset, and add the resulting - * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK. - */ +#ifdef _KERNEL void -fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth) +fm_init(void) { - int i; - char *sym; - ulong_t off; - char *stkpp[FM_STK_DEPTH]; - char buf[FM_STK_DEPTH * FM_SYM_SZ]; - char *stkp = buf; + zevent_len_cur = 0; + zevent_flags = 0; - for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) { - if ((sym = kobj_getsymname(stack[i], &off)) != NULL) - (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off); - else - (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]); - stkpp[i] = stkp; + if (zevent_len_max == 0) + zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4); + + /* Initialize zevent allocation and generation kstats */ + fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED, + sizeof (struct erpt_kstat) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (fm_ksp != NULL) { + fm_ksp->ks_data = &erpt_kstat_data; + kstat_install(fm_ksp); + } else { + cmn_err(CE_NOTE, "failed to create fm/misc kstat\n"); } - fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK, - DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL); + mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zevent_list, sizeof(zevent_t), offsetof(zevent_t, ev_node)); + cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL); } void -print_msg_hwerr(ctid_t ct_id, proc_t *p) +fm_fini(void) { - uprintf("Killed process %d (%s) in contract id %d " - "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id); + int count; + + fm_zevent_drain_all(&count); + cv_broadcast(&zevent_cv); + + mutex_enter(&zevent_lock); + zevent_flags |= ZEVENT_SHUTDOWN; + while (zevent_waiters > 0) { + mutex_exit(&zevent_lock); + schedule(); + mutex_enter(&zevent_lock); + } + mutex_exit(&zevent_lock); + + cv_destroy(&zevent_cv); + list_destroy(&zevent_list); + mutex_destroy(&zevent_lock); + + if (fm_ksp != NULL) { + kstat_delete(fm_ksp); + fm_ksp = NULL; + } } + +module_param(zevent_len_max, int, 0644); +MODULE_PARM_DESC(zevent_len_max, "Maximum event queue length"); + +module_param(zevent_cols, int, 0644); +MODULE_PARM_DESC(zevent_cols, "Maximum event column width"); + +module_param(zevent_console, int, 0644); +MODULE_PARM_DESC(zevent_console, "Log events to the console"); + +#endif /* _KERNEL */ diff --git a/module/zfs/include/sys/fm/fs/zfs.h b/module/zfs/include/sys/fm/fs/zfs.h index 21b7dbe52c..5bce5346e7 100644 --- a/module/zfs/include/sys/fm/fs/zfs.h +++ b/module/zfs/include/sys/fm/fs/zfs.h @@ -35,7 +35,9 @@ extern "C" { #define FM_EREPORT_ZFS_CHECKSUM "checksum" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" +#define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync" #define FM_EREPORT_ZFS_POOL "zpool" +#define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy" #define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown" #define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed" #define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data" @@ -43,9 +45,18 @@ extern "C" { #define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum" #define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small" #define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label" +#define FM_EREPORT_ZFS_DEVICE_REMOVE "vdev.remove" +#define FM_EREPORT_ZFS_DEVICE_CLEAR "vdev.clear" +#define FM_EREPORT_ZFS_DEVICE_CHECK "vdev.check" +#define FM_EREPORT_ZFS_DEVICE_SPARE "vdev.spare" +#define FM_EREPORT_ZFS_DEVICE_AUTOEXPAND "vdev.autoexpand" #define FM_EREPORT_ZFS_IO_FAILURE "io_failure" #define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure" #define FM_EREPORT_ZFS_LOG_REPLAY "log_replay" +#define FM_EREPORT_ZFS_RESILVER_START "resilver.start" +#define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish" +#define FM_EREPORT_ZFS_SCRUB_START "scrub.start" +#define FM_EREPORT_ZFS_SCRUB_FINISH "scrub.finish" #define FM_EREPORT_PAYLOAD_ZFS_POOL "pool" #define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode" @@ -73,8 +84,8 @@ extern "C" { #define FM_EREPORT_FAILMODE_CONTINUE "continue" #define FM_EREPORT_FAILMODE_PANIC "panic" -#define FM_RESOURCE_REMOVED "removed" -#define FM_RESOURCE_AUTOREPLACE "autoreplace" +#define FM_EREPORT_RESOURCE_REMOVED "removed" +#define FM_EREPORT_RESOURCE_AUTOREPLACE "autoreplace" #ifdef __cplusplus } diff --git a/module/zfs/include/sys/fm/protocol.h b/module/zfs/include/sys/fm/protocol.h index 767fb07d81..70c3c93fac 100644 --- a/module/zfs/include/sys/fm/protocol.h +++ b/module/zfs/include/sys/fm/protocol.h @@ -68,6 +68,7 @@ extern "C" { /* ereport payload member names */ #define FM_EREPORT_DETECTOR "detector" #define FM_EREPORT_ENA "ena" +#define FM_EREPORT_TIME "time" /* list.* event payload member names */ #define FM_LIST_EVENT_SIZE "list-sz" @@ -295,15 +296,13 @@ extern "C" { #define FM_FMRI_ZFS_POOL "pool" #define FM_FMRI_ZFS_VDEV "vdev" -extern nv_alloc_t *fm_nva_xcreate(char *, size_t); -extern void fm_nva_xdestroy(nv_alloc_t *); - -extern nvlist_t *fm_nvlist_create(nv_alloc_t *); -extern void fm_nvlist_destroy(nvlist_t *, int); - #define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */ #define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */ +extern nv_alloc_t *fm_nva_xcreate(char *, size_t); +extern void fm_nva_xdestroy(nv_alloc_t *); +extern nvlist_t *fm_nvlist_create(nv_alloc_t *); +extern void fm_nvlist_destroy(nvlist_t *, int); extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t, const nvlist_t *, ...); extern void fm_payload_set(nvlist_t *, ...); @@ -312,15 +311,11 @@ extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *, int, ...); extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *, const char *); -extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *); extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t, uint8_t *, const char *); extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *, const char *, uint64_t); -extern void fm_authority_set(nvlist_t *, int, const char *, const char *, - const char *, const char *); extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t); - extern uint64_t fm_ena_increment(uint64_t); extern uint64_t fm_ena_generate(uint64_t, uchar_t); extern uint64_t fm_ena_generate_cpu(uint64_t, processorid_t, uchar_t); diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h index 4e19e4de09..2052e1998e 100644 --- a/module/zfs/include/sys/fm/util.h +++ b/module/zfs/include/sys/fm/util.h @@ -34,7 +34,6 @@ extern "C" { #endif #include -#include /* * Shared user/kernel definitions for class length, error channel name, @@ -74,27 +73,41 @@ typedef struct erpt_dump { } erpt_dump_t; #ifdef _KERNEL + #include +#include -#define FM_STK_DEPTH 20 /* maximum stack depth */ -#define FM_SYM_SZ 64 /* maximum symbol size */ -#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */ +#define ZEVENT_SHUTDOWN 0x1 -#define FM_EREPORT_PAYLOAD_NAME_STACK "stack" +typedef void zevent_cb_t(nvlist_t *); -extern errorq_t *ereport_errorq; -extern void *ereport_dumpbuf; -extern size_t ereport_dumplen; +typedef struct zevent_s { + nvlist_t *ev_nvl; /* protected by the zevent_lock */ + list_t ev_zpd_list; /* " */ + list_node_t ev_node; /* " */ + zevent_cb_t *ev_cb; /* " */ +} zevent_t; + +typedef struct zfs_private_data { + zevent_t *zpd_zevent; /* protected by the zevent_lock */ + list_node_t zpd_node; /* " */ + uint64_t zpd_dropped; /* " */ +} zfs_private_data_t; extern void fm_init(void); +extern void fm_fini(void); extern void fm_nvprint(nvlist_t *); -extern void fm_panic(const char *, ...); -extern void fm_banner(void); +extern void fm_zevent_init(zfs_private_data_t *); +extern void fm_zevent_fini(zfs_private_data_t *); +extern void fm_zevent_post(nvlist_t *, zevent_cb_t *); +extern void fm_zevent_drain_all(int *); +extern int fm_zevent_next(zfs_private_data_t *, zfs_cmd_t *); +extern int fm_zevent_wait(zfs_private_data_t *); -extern void fm_ereport_dump(void); -extern void fm_ereport_post(nvlist_t *, int); +#else -extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int); +static inline void fm_init(void) { } +static inline void fm_fini(void) { } #endif /* _KERNEL */ diff --git a/module/zfs/include/sys/spa.h b/module/zfs/include/sys/spa.h index aba28dbb2d..e270988214 100644 --- a/module/zfs/include/sys/spa.h +++ b/module/zfs/include/sys/spa.h @@ -509,8 +509,6 @@ struct zio; extern void spa_log_error(spa_t *spa, struct zio *zio); extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd, struct zio *zio, uint64_t stateoroffset, uint64_t length); -extern void zfs_post_remove(spa_t *spa, vdev_t *vd); -extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd); extern uint64_t spa_get_errlog_size(spa_t *spa); extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count); extern void spa_errlog_rotate(spa_t *spa); diff --git a/module/zfs/include/sys/zfs_context.h b/module/zfs/include/sys/zfs_context.h index 2ce5c9da4d..83fbadb7ae 100644 --- a/module/zfs/include/sys/zfs_context.h +++ b/module/zfs/include/sys/zfs_context.h @@ -58,10 +58,7 @@ extern "C" { #include #include #include -#include -#include -#include -#include +#include #ifdef __cplusplus } diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 596fcf3e69..07582d9ef7 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1101,8 +1101,9 @@ spa_check_removed(vdev_t *vd) spa_check_removed(vd->vdev_child[c]); if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) { - zfs_post_autoreplace(vd->vdev_spa, vd); - spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK); + zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE, + vd->vdev_spa, vd, NULL, 0, 0); + spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK); } } @@ -2851,7 +2852,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig, } } - spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY); if (spa->spa_state != POOL_STATE_UNINITIALIZED) { spa_unload(spa); @@ -3161,7 +3162,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) if (newvd->vdev_isspare) { spa_spare_activate(newvd); - spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE); + spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE); } oldvdpath = spa_strdup(oldvd->vdev_path); @@ -3379,7 +3380,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) vd->vdev_detached = B_TRUE; vdev_dirty(tvd, VDD_DTL, vd, txg); - spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); + spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE); error = spa_vdev_exit(spa, vd, txg, 0); @@ -3721,9 +3722,6 @@ spa_async_probe(spa_t *spa, vdev_t *vd) static void spa_async_autoexpand(spa_t *spa, vdev_t *vd) { - sysevent_id_t eid; - nvlist_t *attr; - char *physpath; int c; if (!spa->spa_autoexpand) @@ -3737,17 +3735,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd) if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL) return; - physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - (void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath); - - VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0); - VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0); - - (void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS, - ESC_DEV_DLE, attr, &eid, DDI_SLEEP); - - nvlist_free(attr); - kmem_free(physpath, MAXPATHLEN); + spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND); } static void @@ -4511,8 +4499,7 @@ spa_has_active_shared_spare(spa_t *spa) } /* - * Post a sysevent corresponding to the given event. The 'name' must be one of - * the event definitions in sys/sysevent/eventdefs.h. The payload will be + * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h. The payload will be * filled in from the spa and (optionally) the vdev. This doesn't do anything * in the userland libzpool, as we don't want consumers to misinterpret ztest * or zdb as real changes. @@ -4521,50 +4508,7 @@ void spa_event_notify(spa_t *spa, vdev_t *vd, const char *name) { #ifdef _KERNEL - sysevent_t *ev; - sysevent_attr_list_t *attr = NULL; - sysevent_value_t value; - sysevent_id_t eid; - - ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs", - SE_SLEEP); - - value.value_type = SE_DATA_TYPE_STRING; - value.value.sv_string = spa_name(spa); - if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0) - goto done; - - value.value_type = SE_DATA_TYPE_UINT64; - value.value.sv_uint64 = spa_guid(spa); - if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0) - goto done; - - if (vd) { - value.value_type = SE_DATA_TYPE_UINT64; - value.value.sv_uint64 = vd->vdev_guid; - if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value, - SE_SLEEP) != 0) - goto done; - - if (vd->vdev_path) { - value.value_type = SE_DATA_TYPE_STRING; - value.value.sv_string = vd->vdev_path; - if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH, - &value, SE_SLEEP) != 0) - goto done; - } - } - - if (sysevent_attach_attributes(ev, attr) != 0) - goto done; - attr = NULL; - - (void) log_sysevent(ev, SE_SLEEP, &eid); - -done: - if (attr) - sysevent_free_attr(attr); - sysevent_free(ev); + zfs_ereport_post(name, spa, vd, NULL, 0, 0); #endif } diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 19dca52c12..c29ea277d1 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -267,7 +267,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) spa_config_generation++; if (postsysevent) - spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC); + spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC); } /* diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 4f40e3a608..f87febe30e 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1373,6 +1374,7 @@ spa_init(int mode) spa_mode_global = mode; + fm_init(); refcount_init(); unique_init(); zio_init(); @@ -1398,6 +1400,7 @@ spa_fini(void) zio_fini(); unique_fini(); refcount_fini(); + fm_fini(); avl_destroy(&spa_namespace_avl); avl_destroy(&spa_spare_avl); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index dc9416ee42..52f970fa95 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -2097,7 +2097,7 @@ vdev_clear(spa_t *spa, vdev_t *vd) if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) spa_async_request(spa, SPA_ASYNC_RESILVER); - spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR); + spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR); } } @@ -2634,7 +2634,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux) * Indicate to the ZFS DE that this device has been removed, and * any recent errors should be ignored. */ - zfs_post_remove(spa, vd); + zfs_ereport_post(FM_EREPORT_RESOURCE_REMOVED, + spa, vd, NULL, 0, 0); vd->vdev_removed = B_TRUE; } else if (state == VDEV_STATE_CANT_OPEN) { /* diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 8b7785fa83..f2110feabc 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -88,6 +88,14 @@ * doesn't actually correspond to any particular device or piece of data, * and the caller will always retry without caching or queueing anyway). */ +#ifdef _KERNEL +static void +zfs_ereport_post_cb(nvlist_t *nvl) +{ + fm_nvlist_destroy(nvl, FM_NVA_FREE); +} +#endif /* _KERNEL */ + void zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, uint64_t stateoroffset, uint64_t size) @@ -205,6 +213,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, vd != NULL ? vd->vdev_guid : 0); fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); + fm_nvlist_destroy(detector, FM_NVA_FREE); /* * Construct the per-ereport payload, depending on which parameters are @@ -324,58 +333,11 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, } mutex_exit(&spa->spa_errlist_lock); - fm_ereport_post(ereport, EVCH_SLEEP); - - fm_nvlist_destroy(ereport, FM_NVA_FREE); - fm_nvlist_destroy(detector, FM_NVA_FREE); -#endif + /* Cleanup must be handled by the passed callback function */ + fm_zevent_post(ereport, zfs_ereport_post_cb); +#endif /* _KERNEL */ } -static void -zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) -{ -#ifdef _KERNEL - nvlist_t *resource; - char class[64]; - - if ((resource = fm_nvlist_create(NULL)) == NULL) - return; - - (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, - ZFS_ERROR_CLASS, name); - VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); - VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); - if (vd) - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); - - fm_ereport_post(resource, EVCH_SLEEP); - - fm_nvlist_destroy(resource, FM_NVA_FREE); -#endif -} - -/* - * The 'resource.fs.zfs.removed' event is an internal signal that the given vdev - * has been removed from the system. This will cause the DE to ignore any - * recent I/O errors, inferring that they are due to the asynchronous device - * removal. - */ -void -zfs_post_remove(spa_t *spa, vdev_t *vd) -{ - zfs_post_common(spa, vd, FM_RESOURCE_REMOVED); -} - -/* - * The 'resource.fs.zfs.autoreplace' event is an internal signal that the pool - * has the 'autoreplace' property set, and therefore any broken vdevs will be - * handled by higher level logic, and no vdev fault should be generated. - */ -void -zfs_post_autoreplace(spa_t *spa, vdev_t *vd) -{ - zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE); -} +#if defined(_KERNEL) && defined(HAVE_SPL) +EXPORT_SYMBOL(zfs_ereport_post); +#endif /* _KERNEL */