Add linux events

This topic branch leverages the Solaris style FMA call points
in ZFS to create a user space visible event notification system
under Linux.  This new system is called zevent and it unifies
all previous Solaris style ereports and sysevent notifications.

Under this Linux specific scheme when a sysevent or ereport event
occurs an nvlist describing the event is created which looks almost
exactly like a Solaris ereport.  These events are queued up in the
kernel when they occur and conditionally logged to the console.
It is then up to a user space application to consume the events
and do whatever it likes with them.

To make this possible the existing /dev/zfs ABI has been extended
with two new ioctls which behave as follows.

* ZFS_IOC_EVENTS_NEXT
Get the next pending event.  The kernel will keep track of the last
event consumed by the file descriptor and provide the next one if
available.  If no new events are available the ioctl() will block
waiting for the next event.  This ioctl may also be called in a
non-blocking mode by setting zc.zc_guid = ZEVENT_NONBLOCK.  In the
non-blocking case if no events are available ENOENT will be returned.
It is possible that ESHUTDOWN will be returned if the ioctl() is
called while module unloading is in progress.  And finally ENOMEM
may occur if the provided nvlist buffer is not large enough to
contain the entire event.

* ZFS_IOC_EVENTS_CLEAR
Clear are events queued by the kernel.  The kernel will keep a fairly
large number of recent events queued, use this ioctl to clear the
in kernel list.  This will effect all user space processes consuming
events.

The zpool command has been extended to use this events ABI with the
'events' subcommand.  You may run 'zpool events -v' to output a
verbose log of all recent events.  This is very similar to the
Solaris 'fmdump -ev' command with the key difference being it also
includes what would be considered sysevents under Solaris.  You
may also run in follow mode with the '-f' option.  To clear the
in kernel event queue use the '-c' option.

$ sudo cmd/zpool/zpool events -fv
TIME                        CLASS
May 13 2010 16:31:15.777711000 ereport.fs.zfs.config.sync
        class = "ereport.fs.zfs.config.sync"
        ena = 0x40982b7897700001
        detector = (embedded nvlist)
                version = 0x0
                scheme = "zfs"
                pool = 0xed976600de75dfa6
        (end detector)

        time = 0x4bec8bc3 0x2e5aed98
        pool = "zpios"
        pool_guid = 0xed976600de75dfa6
        pool_context = 0x0

While the 'zpool events' command is handy for interactive debugging
it is not expected to be the primary consumer of zevents.  This ABI
was primarily added to facilitate the addition of a user space
monitoring daemon.  This daemon would consume all events posted by
the kernel and based on the type of event perform an action.  For
most events simply forwarding them on to syslog is likely enough.
But this interface also cleanly allows for more sophisticated
actions to be taken such as generating an email for a failed drive.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Brian Behlendorf 2010-08-26 11:42:43 -07:00
parent c9c0d073da
commit 266852767f
18 changed files with 1127 additions and 514 deletions

View File

@ -42,6 +42,8 @@
#include <zone.h>
#include <sys/fs/zfs.h>
#include <sys/stat.h>
#include <sys/fm/util.h>
#include <sys/fm/protocol.h>
#include <libzfs.h>
@ -77,6 +79,7 @@ static int zpool_do_export(int, char **);
static int zpool_do_upgrade(int, char **);
static int zpool_do_history(int, char **);
static int zpool_do_events(int, char **);
static int zpool_do_get(int, char **);
static int zpool_do_set(int, char **);
@ -119,6 +122,7 @@ typedef enum {
HELP_SCRUB,
HELP_STATUS,
HELP_UPGRADE,
HELP_EVENTS,
HELP_GET,
HELP_SET,
HELP_SPLIT
@ -167,6 +171,8 @@ static zpool_command_t command_table[] = {
{ "upgrade", zpool_do_upgrade, HELP_UPGRADE },
{ NULL },
{ "history", zpool_do_history, HELP_HISTORY },
{ "events", zpool_do_events, HELP_EVENTS },
{ NULL },
{ "get", zpool_do_get, HELP_GET },
{ "set", zpool_do_set, HELP_SET },
};
@ -234,6 +240,8 @@ get_usage(zpool_help_t idx) {
return (gettext("\tupgrade\n"
"\tupgrade -v\n"
"\tupgrade [-V version] <-a | pool ...>\n"));
case HELP_EVENTS:
return (gettext("\tevents [-vfc]\n"));
case HELP_GET:
return (gettext("\tget <\"all\" | property[,...]> "
"<pool> ...\n"));
@ -4210,6 +4218,331 @@ zpool_do_history(int argc, char **argv)
return (ret);
}
typedef struct ev_opts {
int verbose;
int follow;
int clear;
} ev_opts_t;
static void
zpool_do_events_short(nvlist_t *nvl)
{
char ctime_str[26], str[32], *ptr;
int64_t *tv;
uint_t n;
verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0);
memset(str, ' ', 32);
(void) ctime_r((const time_t *)&tv[0], ctime_str);
(void) strncpy(str, ctime_str+4, 6); /* 'Jun 30' */
(void) strncpy(str+7, ctime_str+20, 4); /* '1993' */
(void) strncpy(str+12, ctime_str+11, 8); /* '21:49:08' */
(void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]);/* '.123456789' */
(void) printf(gettext("%s "), str);
verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0);
(void) printf(gettext("%s\n"), ptr);
}
static void
zpool_do_events_nvprint(nvlist_t *nvl, int depth)
{
nvpair_t *nvp;
for (nvp = nvlist_next_nvpair(nvl, NULL);
nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
data_type_t type = nvpair_type(nvp);
const char *name = nvpair_name(nvp);
boolean_t b;
uint8_t i8;
uint16_t i16;
uint32_t i32;
uint64_t i64;
char *str;
nvlist_t *cnv;
printf(gettext("%*s%s = "), depth, "", name);
switch (type) {
case DATA_TYPE_BOOLEAN:
printf(gettext("%s"), "1");
break;
case DATA_TYPE_BOOLEAN_VALUE:
(void) nvpair_value_boolean_value(nvp, &b);
printf(gettext("%s"), b ? "1" : "0");
break;
case DATA_TYPE_BYTE:
(void) nvpair_value_byte(nvp, &i8);
printf(gettext("0x%x"), i8);
break;
case DATA_TYPE_INT8:
(void) nvpair_value_int8(nvp, (void *)&i8);
printf(gettext("0x%x"), i8);
break;
case DATA_TYPE_UINT8:
(void) nvpair_value_uint8(nvp, &i8);
printf(gettext("0x%x"), i8);
break;
case DATA_TYPE_INT16:
(void) nvpair_value_int16(nvp, (void *)&i16);
printf(gettext("0x%x"), i16);
break;
case DATA_TYPE_UINT16:
(void) nvpair_value_uint16(nvp, &i16);
printf(gettext("0x%x"), i16);
break;
case DATA_TYPE_INT32:
(void) nvpair_value_int32(nvp, (void *)&i32);
printf(gettext("0x%x"), i32);
break;
case DATA_TYPE_UINT32:
(void) nvpair_value_uint32(nvp, &i32);
printf(gettext("0x%x"), i32);
break;
case DATA_TYPE_INT64:
(void) nvpair_value_int64(nvp, (void *)&i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;
case DATA_TYPE_UINT64:
(void) nvpair_value_uint64(nvp, &i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;
case DATA_TYPE_HRTIME:
(void) nvpair_value_hrtime(nvp, (void *)&i64);
printf(gettext("0x%llx"), (u_longlong_t)i64);
break;
case DATA_TYPE_STRING:
(void) nvpair_value_string(nvp, &str);
printf(gettext("\"%s\""), str ? str : "<NULL>");
break;
case DATA_TYPE_NVLIST:
printf(gettext("(embedded nvlist)\n"));
(void) nvpair_value_nvlist(nvp, &cnv);
zpool_do_events_nvprint(cnv, depth + 8);
printf(gettext("%*s(end %s)\n"), depth, "", name);
break;
case DATA_TYPE_NVLIST_ARRAY: {
nvlist_t **val;
uint_t i, nelem;
(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
printf(gettext("(%d embedded nvlists)\n"), nelem);
for (i = 0; i < nelem; i++) {
printf(gettext("%*s%s[%d] = %s\n"),
depth, "", name, i, "(embedded nvlist)");
zpool_do_events_nvprint(val[i], depth + 8);
printf(gettext("%*s(end %s[%i])\n"),
depth, "", name, i);
}
printf(gettext("%*s(end %s)\n"), depth, "", name);
}
break;
case DATA_TYPE_INT8_ARRAY: {
int8_t *val;
uint_t i, nelem;
(void) nvpair_value_int8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_UINT8_ARRAY: {
uint8_t *val;
uint_t i, nelem;
(void) nvpair_value_uint8_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_INT16_ARRAY: {
int16_t *val;
uint_t i, nelem;
(void) nvpair_value_int16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_UINT16_ARRAY: {
uint16_t *val;
uint_t i, nelem;
(void) nvpair_value_uint16_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_INT32_ARRAY: {
int32_t *val;
uint_t i, nelem;
(void) nvpair_value_int32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_UINT32_ARRAY: {
uint32_t *val;
uint_t i, nelem;
(void) nvpair_value_uint32_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%x "), val[i]);
break;
}
case DATA_TYPE_INT64_ARRAY: {
int64_t *val;
uint_t i, nelem;
(void) nvpair_value_int64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%llx "), (u_longlong_t)val[i]);
break;
}
case DATA_TYPE_UINT64_ARRAY: {
uint64_t *val;
uint_t i, nelem;
(void) nvpair_value_uint64_array(nvp, &val, &nelem);
for (i = 0; i < nelem; i++)
printf(gettext("0x%llx "), (u_longlong_t)val[i]);
break;
}
case DATA_TYPE_STRING_ARRAY:
case DATA_TYPE_BOOLEAN_ARRAY:
case DATA_TYPE_BYTE_ARRAY:
case DATA_TYPE_DOUBLE:
case DATA_TYPE_UNKNOWN:
printf(gettext("<unknown>"));
break;
}
printf(gettext("\n"));
}
}
static int
zpool_do_events_next(ev_opts_t *opts)
{
nvlist_t *nvl;
int cleanup_fd, ret, dropped;
cleanup_fd = open(ZFS_DEV, O_RDWR);
VERIFY(cleanup_fd >= 0);
(void) printf(gettext("%-30s %s\n"), "TIME", "CLASS");
while (1) {
ret = zpool_events_next(g_zfs, &nvl, &dropped,
!!opts->follow, cleanup_fd);
if (ret || nvl == NULL)
break;
if (dropped > 0)
(void) printf(gettext("dropped %d events\n"), dropped);
zpool_do_events_short(nvl);
if (opts->verbose) {
zpool_do_events_nvprint(nvl, 8);
printf(gettext("\n"));
}
nvlist_free(nvl);
}
VERIFY(0 == close(cleanup_fd));
return (ret);
}
static int
zpool_do_events_clear(ev_opts_t *opts)
{
int count, ret;
ret = zpool_events_clear(g_zfs, &count);
if (!ret)
(void) printf(gettext("cleared %d events\n"), count);
return (ret);
}
/*
* zpool events [-vfc]
*
* Displays events logs by ZFS.
*/
int
zpool_do_events(int argc, char **argv)
{
ev_opts_t opts = { 0 };
int ret;
int c;
/* check options */
while ((c = getopt(argc, argv, "vfc")) != -1) {
switch (c) {
case 'v':
opts.verbose = 1;
break;
case 'f':
opts.follow = 1;
break;
case 'c':
opts.clear = 1;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc -= optind;
argv += optind;
if (opts.clear)
ret = zpool_do_events_clear(&opts);
else
ret = zpool_do_events_next(&opts);
return ret;
}
static int
get_callback(zpool_handle_t *zhp, void *data)
{

View File

@ -368,6 +368,8 @@ extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
extern void zpool_set_history_str(const char *subcommand, int argc,
char **argv, char *history_str);
extern int zpool_stage_history(libzfs_handle_t *, const char *);
extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int);
extern int zpool_events_clear(libzfs_handle_t *, int *);
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);

View File

@ -3474,6 +3474,94 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
return (err);
}
/*
* Retrieve the next event. If there is a new event available 'nvp' will
* contain a newly allocated nvlist and 'dropped' will be set to the number
* of missed events since the last call to this function. When 'nvp' is
* set to NULL it indicates no new events are available. In either case
* the function returns 0 and it is up to the caller to free 'nvp'. In
* the case of a fatal error the function will return a non-zero value.
* When the function is called in blocking mode it will not return until
* a new event is available.
*/
int
zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
int *dropped, int block, int cleanup_fd)
{
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
int error = 0;
*nvp = NULL;
*dropped = 0;
zc.zc_cleanup_fd = cleanup_fd;
if (!block)
zc.zc_guid = ZEVENT_NONBLOCK;
if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
return (-1);
retry:
if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
switch (errno) {
case ESHUTDOWN:
error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
dgettext(TEXT_DOMAIN, "zfs shutdown"));
goto out;
case ENOENT:
/* Blocking error case should not occur */
if (block)
error = zpool_standard_error_fmt(hdl, errno,
dgettext(TEXT_DOMAIN, "cannot get event"));
goto out;
case ENOMEM:
if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
error = zfs_error_fmt(hdl, EZFS_NOMEM,
dgettext(TEXT_DOMAIN, "cannot get event"));
goto out;
} else {
goto retry;
}
default:
error = zpool_standard_error_fmt(hdl, errno,
dgettext(TEXT_DOMAIN, "cannot get event"));
goto out;
}
}
error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
if (error != 0)
goto out;
*dropped = (int)zc.zc_cookie;
out:
zcmd_free_nvlists(&zc);
return (error);
}
/*
* Clear all events.
*/
int
zpool_events_clear(libzfs_handle_t *hdl, int *count)
{
zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
char msg[1024];
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
"cannot clear events"));
if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
return (zpool_standard_error_fmt(hdl, errno, msg));
if (count != NULL)
*count = (int)zc.zc_cookie; /* # of events cleared */
return (0);
}
void
zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
char *pathname, size_t len)

View File

@ -57,7 +57,6 @@ extern "C" {
#include <atomic.h>
#include <dirent.h>
#include <time.h>
#include <libsysevent.h>
#include <sys/note.h>
#include <sys/types.h>
#include <sys/cred.h>
@ -71,8 +70,7 @@ extern "C" {
#include <sys/sdt.h>
#include <sys/kstat.h>
#include <sys/u8_textprep.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <sys/fm/fs/zfs.h>
#include <sys/sunddi.h>
/*

View File

@ -774,7 +774,9 @@ typedef enum zfs_ioc {
ZFS_IOC_NEXT_OBJ,
ZFS_IOC_DIFF,
ZFS_IOC_TMP_SNAPSHOT,
ZFS_IOC_OBJ_TO_STATS
ZFS_IOC_OBJ_TO_STATS,
ZFS_IOC_EVENTS_NEXT,
ZFS_IOC_EVENTS_CLEAR,
} zfs_ioc_t;
/*

View File

@ -50,9 +50,7 @@
typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_defrag_cb;
static scan_cb_t dsl_scan_scrub_cb;
static scan_cb_t dsl_scan_remove_cb;
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
@ -194,9 +192,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
if (vdev_resilver_needed(spa->spa_root_vdev,
&scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
} else {
spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
}
spa->spa_scrub_started = B_TRUE;
@ -297,7 +295,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
if (complete) {
spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
FM_EREPORT_ZFS_RESILVER_FINISH :
FM_EREPORT_ZFS_SCRUB_FINISH);
}
spa_errlog_rotate(spa);

File diff suppressed because it is too large Load Diff

View File

@ -35,7 +35,9 @@ extern "C" {
#define FM_EREPORT_ZFS_CHECKSUM "checksum"
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync"
#define FM_EREPORT_ZFS_POOL "zpool"
#define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data"
@ -43,9 +45,19 @@ extern "C" {
#define FM_EREPORT_ZFS_DEVICE_BAD_GUID_SUM "vdev.bad_guid_sum"
#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
#define FM_EREPORT_ZFS_DEVICE_REMOVE "vdev.remove"
#define FM_EREPORT_ZFS_DEVICE_CLEAR "vdev.clear"
#define FM_EREPORT_ZFS_DEVICE_CHECK "vdev.check"
#define FM_EREPORT_ZFS_DEVICE_SPARE "vdev.spare"
#define FM_EREPORT_ZFS_DEVICE_AUTOEXPAND "vdev.autoexpand"
#define FM_EREPORT_ZFS_IO_FAILURE "io_failure"
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
#define FM_EREPORT_ZFS_RESILVER_START "resilver.start"
#define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish"
#define FM_EREPORT_ZFS_SCRUB_START "scrub.start"
#define FM_EREPORT_ZFS_SCRUB_FINISH "scrub.finish"
#define FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH "bootfs.vdev.attach"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
@ -56,6 +68,7 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
@ -85,9 +98,9 @@ extern "C" {
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
#define FM_EREPORT_FAILMODE_PANIC "panic"
#define FM_RESOURCE_REMOVED "removed"
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
#define FM_RESOURCE_STATECHANGE "statechange"
#define FM_EREPORT_RESOURCE_REMOVED "removed"
#define FM_EREPORT_RESOURCE_AUTOREPLACE "autoreplace"
#define FM_EREPORT_RESOURCE_STATECHANGE "statechange"
#ifdef __cplusplus
}

View File

@ -69,6 +69,7 @@ extern "C" {
/* ereport payload member names */
#define FM_EREPORT_DETECTOR "detector"
#define FM_EREPORT_ENA "ena"
#define FM_EREPORT_TIME "time"
/* list.* event payload member names */
#define FM_LIST_EVENT_SIZE "list-sz"
@ -327,16 +328,13 @@ extern "C" {
#define FM_FMRI_SW_CTXT_ZONE "zone"
#define FM_FMRI_SW_CTXT_CTID "ctid"
#define FM_FMRI_SW_CTXT_STACK "stack"
extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
extern void fm_nva_xdestroy(nv_alloc_t *);
extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
extern void fm_nvlist_destroy(nvlist_t *, int);
#define FM_NVA_FREE 0 /* free allocator on nvlist_destroy */
#define FM_NVA_RETAIN 1 /* keep allocator on nvlist_destroy */
extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
extern void fm_nva_xdestroy(nv_alloc_t *);
extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
extern void fm_nvlist_destroy(nvlist_t *, int);
extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
const nvlist_t *, ...);
extern void fm_payload_set(nvlist_t *, ...);
@ -350,8 +348,6 @@ extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
uint8_t *, const char *);
extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
const char *, uint64_t);
extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
const char *, const char *);
extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
nvlist_t *, int, ...);

View File

@ -31,7 +31,6 @@ extern "C" {
#endif
#include <sys/nvpair.h>
#include <sys/errorq.h>
/*
* Shared user/kernel definitions for class length, error channel name,
@ -71,29 +70,42 @@ typedef struct erpt_dump {
} erpt_dump_t;
#ifdef _KERNEL
#include <sys/systm.h>
#define FM_STK_DEPTH 20 /* maximum stack depth */
#define FM_SYM_SZ 64 /* maximum symbol size */
#define FM_ERR_PIL 2 /* PIL for ereport_errorq drain processing */
#define ZEVENT_SHUTDOWN 0x1
#define FM_EREPORT_PAYLOAD_NAME_STACK "stack"
typedef void zevent_cb_t(nvlist_t *, nvlist_t *);
extern errorq_t *ereport_errorq;
extern void *ereport_dumpbuf;
extern size_t ereport_dumplen;
typedef struct zevent_s {
nvlist_t *ev_nvl; /* protected by the zevent_lock */
nvlist_t *ev_detector; /* " */
list_t ev_ze_list; /* " */
list_node_t ev_node; /* " */
zevent_cb_t *ev_cb; /* " */
} zevent_t;
typedef struct zfs_zevent {
zevent_t *ze_zevent; /* protected by the zevent_lock */
list_node_t ze_node; /* " */
uint64_t ze_dropped; /* " */
} zfs_zevent_t;
extern void fm_init(void);
extern void fm_fini(void);
extern void fm_nvprint(nvlist_t *);
extern void fm_panic(const char *, ...);
extern void fm_banner(void);
extern void zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
extern void zfs_zevent_drain_all(int *);
extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
extern void zfs_zevent_fd_rele(int);
extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *);
extern int zfs_zevent_wait(zfs_zevent_t *);
extern void zfs_zevent_init(zfs_zevent_t **);
extern void zfs_zevent_destroy(zfs_zevent_t *);
extern void fm_ereport_dump(void);
extern void fm_ereport_post(nvlist_t *, int);
#else
extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int);
static inline void fm_init(void) { }
static inline void fm_fini(void) { }
extern int is_fm_panic();
#endif /* _KERNEL */
#ifdef __cplusplus

View File

@ -58,14 +58,9 @@ extern "C" {
#include <sys/zone.h>
#include <sys/uio.h>
#include <sys/zfs_debug.h>
#include <sys/sysevent.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <sys/fm/util.h>
#include <sys/fm/fs/zfs.h>
#include <sys/sunddi.h>
#define CPU_SEQID (CPU->cpu_seqid)
#ifdef __cplusplus
}
#endif

View File

@ -236,6 +236,9 @@ typedef struct zinject_record {
#define ZINJECT_FLUSH_ARC 0x2
#define ZINJECT_UNLOAD_SPA 0x4
#define ZEVENT_NONBLOCK 0x1
#define ZEVENT_SIZE 1024
typedef struct zfs_share {
uint64_t z_exportdata;
uint64_t z_sharedata;

View File

@ -1293,8 +1293,9 @@ spa_check_removed(vdev_t *vd)
spa_check_removed(vd->vdev_child[c]);
if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
zfs_post_autoreplace(vd->vdev_spa, vd);
spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
vd->vdev_spa, vd, NULL, 0, 0);
spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
}
}
@ -3639,7 +3640,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
}
}
spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
spa_unload(spa);
@ -3970,7 +3971,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
if (newvd->vdev_isspare) {
spa_spare_activate(newvd);
spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
}
oldvdpath = spa_strdup(oldvd->vdev_path);
@ -4002,7 +4003,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
spa_strfree(newvdpath);
if (spa->spa_bootfs)
spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
return (0);
}
@ -4203,7 +4204,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
vd->vdev_detached = B_TRUE;
vdev_dirty(tvd, VDD_DTL, vd, txg);
spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
/* hang on to the spa before we release the lock */
spa_open_ref(spa, FTAG);
@ -5034,9 +5035,6 @@ spa_async_probe(spa_t *spa, vdev_t *vd)
static void
spa_async_autoexpand(spa_t *spa, vdev_t *vd)
{
sysevent_id_t eid;
nvlist_t *attr;
char *physpath;
int c;
if (!spa->spa_autoexpand)
@ -5050,17 +5048,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
return;
physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
nvlist_free(attr);
kmem_free(physpath, MAXPATHLEN);
spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
}
static void
@ -5858,8 +5846,7 @@ spa_has_active_shared_spare(spa_t *spa)
}
/*
* Post a sysevent corresponding to the given event. The 'name' must be one of
* the event definitions in sys/sysevent/eventdefs.h. The payload will be
* Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h. The payload will be
* filled in from the spa and (optionally) the vdev. This doesn't do anything
* in the userland libzpool, as we don't want consumers to misinterpret ztest
* or zdb as real changes.
@ -5868,49 +5855,6 @@ void
spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
{
#ifdef _KERNEL
sysevent_t *ev;
sysevent_attr_list_t *attr = NULL;
sysevent_value_t value;
sysevent_id_t eid;
ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
SE_SLEEP);
value.value_type = SE_DATA_TYPE_STRING;
value.value.sv_string = spa_name(spa);
if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
goto done;
value.value_type = SE_DATA_TYPE_UINT64;
value.value.sv_uint64 = spa_guid(spa);
if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
goto done;
if (vd) {
value.value_type = SE_DATA_TYPE_UINT64;
value.value.sv_uint64 = vd->vdev_guid;
if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
SE_SLEEP) != 0)
goto done;
if (vd->vdev_path) {
value.value_type = SE_DATA_TYPE_STRING;
value.value.sv_string = vd->vdev_path;
if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
&value, SE_SLEEP) != 0)
goto done;
}
}
if (sysevent_attach_attributes(ev, attr) != 0)
goto done;
attr = NULL;
(void) log_sysevent(ev, SE_SLEEP, &eid);
done:
if (attr)
sysevent_free_attr(attr);
sysevent_free(ev);
zfs_ereport_post(name, spa, vd, NULL, 0, 0);
#endif
}

View File

@ -258,7 +258,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
spa_config_generation++;
if (postsysevent)
spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
}
/*

View File

@ -40,6 +40,7 @@
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_prop.h>
#include <sys/fm/util.h>
#include <sys/dsl_scan.h>
#include <sys/fs/zfs.h>
#include <sys/metaslab_impl.h>
@ -1540,6 +1541,7 @@ spa_init(int mode)
spa_mode_global = mode;
fm_init();
refcount_init();
unique_init();
zio_init();
@ -1565,6 +1567,7 @@ spa_fini(void)
zio_fini();
unique_fini();
refcount_fini();
fm_fini();
avl_destroy(&spa_namespace_avl);
avl_destroy(&spa_spare_avl);

View File

@ -2388,7 +2388,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
spa_async_request(spa, SPA_ASYNC_RESILVER);
spa_event_notify(spa, vd, ESC_ZFS_VDEV_CLEAR);
spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR);
}
/*

View File

@ -98,6 +98,16 @@
* ereport with information about the differences.
*/
#ifdef _KERNEL
static void
zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
{
if (nvl)
fm_nvlist_destroy(nvl, FM_NVA_FREE);
if (detector)
fm_nvlist_destroy(detector, FM_NVA_FREE);
}
static void
zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
@ -410,7 +420,7 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count)
* to the new smallest gap, to prepare for our next invocation.
*/
static void
shrink_ranges(zfs_ecksum_info_t *eip)
zei_shrink_ranges(zfs_ecksum_info_t *eip)
{
uint32_t mingap = UINT32_MAX;
uint32_t new_allowed_gap = eip->zei_mingap + 1;
@ -429,12 +439,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
uint32_t end = r[idx].zr_end;
while (idx < max - 1) {
uint32_t nstart, nend, gap;
idx++;
nstart = r[idx].zr_start;
nend = r[idx].zr_end;
uint32_t nstart = r[idx].zr_start;
uint32_t nend = r[idx].zr_end;
uint32_t gap = nstart - end;
gap = nstart - end;
if (gap < new_allowed_gap) {
end = nend;
continue;
@ -454,13 +465,13 @@ shrink_ranges(zfs_ecksum_info_t *eip)
}
static void
add_range(zfs_ecksum_info_t *eip, int start, int end)
zei_add_range(zfs_ecksum_info_t *eip, int start, int end)
{
struct zei_ranges *r = eip->zei_ranges;
size_t count = eip->zei_range_count;
if (count >= MAX_RANGES) {
shrink_ranges(eip);
zei_shrink_ranges(eip);
count = eip->zei_range_count;
}
if (count == 0) {
@ -482,7 +493,7 @@ add_range(zfs_ecksum_info_t *eip, int start, int end)
}
static size_t
range_total_size(zfs_ecksum_info_t *eip)
zei_range_total_size(zfs_ecksum_info_t *eip)
{
struct zei_ranges *r = eip->zei_ranges;
size_t count = eip->zei_range_count;
@ -559,7 +570,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
if (start == -1)
continue;
add_range(eip, start, idx);
zei_add_range(eip, start, idx);
start = -1;
} else {
if (start != -1)
@ -569,10 +580,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
}
}
if (start != -1)
add_range(eip, start, idx);
zei_add_range(eip, start, idx);
/* See if it will fit in our inline buffers */
inline_size = range_total_size(eip);
inline_size = zei_range_total_size(eip);
if (inline_size > ZFM_MAX_INLINE)
no_inline = 1;
@ -675,10 +686,8 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
if (ereport == NULL)
return;
fm_ereport_post(ereport, EVCH_SLEEP);
fm_nvlist_destroy(ereport, FM_NVA_FREE);
fm_nvlist_destroy(detector, FM_NVA_FREE);
/* Cleanup is handled by the callback function */
zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
#endif
}
@ -730,12 +739,10 @@ zfs_ereport_finish_checksum(zio_cksum_report_t *report,
good_data, bad_data, report->zcr_length, drop_if_identical);
if (info != NULL)
fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
zfs_zevent_post(report->zcr_ereport,
report->zcr_detector, zfs_zevent_post_cb);
fm_nvlist_destroy(report->zcr_ereport, FM_NVA_FREE);
fm_nvlist_destroy(report->zcr_detector, FM_NVA_FREE);
report->zcr_ereport = report->zcr_detector = NULL;
if (info != NULL)
kmem_free(info, sizeof (*info));
#endif
@ -764,7 +771,7 @@ void
zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
{
#ifdef _KERNEL
fm_ereport_post(report->zcr_ereport, EVCH_SLEEP);
zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL);
#endif
}
@ -787,14 +794,10 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
info = annotate_ecksum(ereport, zbc, good_data, bad_data, length,
B_FALSE);
if (info != NULL)
fm_ereport_post(ereport, EVCH_SLEEP);
fm_nvlist_destroy(ereport, FM_NVA_FREE);
fm_nvlist_destroy(detector, FM_NVA_FREE);
if (info != NULL)
if (info != NULL) {
zfs_zevent_post(ereport, detector, zfs_zevent_post_cb);
kmem_free(info, sizeof (*info));
}
#endif
}
@ -817,13 +820,14 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0);
VERIFY(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0);
if (vd)
if (vd) {
VERIFY(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0);
VERIFY(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0);
}
fm_ereport_post(resource, EVCH_SLEEP);
fm_nvlist_destroy(resource, FM_NVA_FREE);
zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
#endif
}
@ -836,7 +840,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
void
zfs_post_remove(spa_t *spa, vdev_t *vd)
{
zfs_post_common(spa, vd, FM_RESOURCE_REMOVED);
zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED);
}
/*
@ -847,7 +851,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd)
void
zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
{
zfs_post_common(spa, vd, FM_RESOURCE_AUTOREPLACE);
zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE);
}
/*
@ -859,5 +863,13 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
void
zfs_post_state_change(spa_t *spa, vdev_t *vd)
{
zfs_post_common(spa, vd, FM_RESOURCE_STATECHANGE);
zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zfs_ereport_post);
EXPORT_SYMBOL(zfs_ereport_post_checksum);
EXPORT_SYMBOL(zfs_post_remove);
EXPORT_SYMBOL(zfs_post_autoreplace);
EXPORT_SYMBOL(zfs_post_state_change);
#endif /* _KERNEL */

View File

@ -1798,7 +1798,7 @@ zfs_ioc_objset_stats(zfs_cmd_t *zc)
* local property values.
*/
static int
zfs_ioc_objset_recvd_props(struct file *filp, zfs_cmd_t *zc)
zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
{
objset_t *os = NULL;
int error;
@ -4626,6 +4626,67 @@ zfs_ioc_get_holds(zfs_cmd_t *zc)
return (error);
}
/*
* inputs:
* zc_guid flags (ZEVENT_NONBLOCK)
*
* outputs:
* zc_nvlist_dst next nvlist event
* zc_cookie dropped events since last get
* zc_cleanup_fd cleanup-on-exit file descriptor
*/
static int
zfs_ioc_events_next(zfs_cmd_t *zc)
{
zfs_zevent_t *ze;
nvlist_t *event = NULL;
minor_t minor;
uint64_t dropped = 0;
int error;
error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
if (error != 0)
return (error);
do {
error = zfs_zevent_next(ze, &event, &dropped);
if (event != NULL) {
zc->zc_cookie = dropped;
error = put_nvlist(zc, event);
nvlist_free(event);
}
if (zc->zc_guid & ZEVENT_NONBLOCK)
break;
if ((error == 0) || (error != ENOENT))
break;
error = zfs_zevent_wait(ze);
if (error)
break;
} while (1);
zfs_zevent_fd_rele(zc->zc_cleanup_fd);
return (error);
}
/*
* outputs:
* zc_cookie cleared events count
*/
static int
zfs_ioc_events_clear(zfs_cmd_t *zc)
{
int count;
zfs_zevent_drain_all(&count);
zc->zc_cookie = count;
return 0;
}
/*
* pool create, destroy, and export don't log the history as part of
* zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
@ -4747,7 +4808,11 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
POOL_CHECK_SUSPENDED }
POOL_CHECK_SUSPENDED },
{ zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
POOL_CHECK_NONE },
{ zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
POOL_CHECK_NONE },
};
int