Prevent zevent list from consuming all of kernel memory

There are a couple changes included here. The first is to introduce 
a cap on the size the ZED will grow the zevent list to. One million 
entries is more than enough for most use cases, and if you are 
overflowing that value, the problem needs to be addressed another 
way. The value is also tunable, for those who want the limit to be 
higher or lower. 
 
The other change is to add a kernel module parameter that allows 
snapshot creation/deletion to be exempted from the history logging; 
for most workloads, having these things logged is valuable, but for 
some workloads it produces large quantities of log spam and isn't 
especially helpful.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Issue #13374 
Closes #13753
This commit is contained in:
Paul Dagnelie 2022-08-22 12:36:22 -07:00 committed by GitHub
parent d22dd77c4d
commit 17e212652d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 9 deletions

View File

@ -48,6 +48,7 @@ zed_conf_init(struct zed_conf *zcp)
zcp->zevent_fd = -1; /* opened in zed_event_init() */ zcp->zevent_fd = -1; /* opened in zed_event_init() */
zcp->max_jobs = 16; zcp->max_jobs = 16;
zcp->max_zevent_buf_len = 1 << 20;
if (!(zcp->pid_file = strdup(ZED_PID_FILE)) || if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
!(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) || !(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
@ -141,6 +142,8 @@ _zed_conf_display_help(const char *prog, boolean_t got_err)
.v = ZED_STATE_FILE }, .v = ZED_STATE_FILE },
{ .o = "-j JOBS", .d = "Start at most JOBS at once.", { .o = "-j JOBS", .d = "Start at most JOBS at once.",
.v = "16" }, .v = "16" },
{ .o = "-b LEN", .d = "Cap kernel event buffer at LEN entries.",
.v = "1048576" },
{}, {},
}; };
@ -230,7 +233,7 @@ _zed_conf_parse_path(char **resultp, const char *path)
void void
zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv) zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
{ {
const char * const opts = ":hLVd:p:P:s:vfFMZIj:"; const char * const opts = ":hLVd:p:P:s:vfFMZIj:b:";
int opt; int opt;
unsigned long raw; unsigned long raw;
@ -291,6 +294,17 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
zcp->max_jobs = raw; zcp->max_jobs = raw;
} }
break; break;
case 'b':
errno = 0;
raw = strtoul(optarg, NULL, 0);
if (errno == ERANGE || raw > INT32_MAX) {
zed_log_die("%lu is too large", raw);
} if (raw == 0) {
zcp->max_zevent_buf_len = INT32_MAX;
} else {
zcp->max_zevent_buf_len = raw;
}
break;
case '?': case '?':
default: default:
if (optopt == '?') if (optopt == '?')

View File

@ -33,6 +33,7 @@ struct zed_conf {
int zevent_fd; /* fd for access to zevents */ int zevent_fd; /* fd for access to zevents */
int16_t max_jobs; /* max zedlets to run at one time */ int16_t max_jobs; /* max zedlets to run at one time */
int32_t max_zevent_buf_len; /* max size of kernel event list */
boolean_t do_force:1; /* true if force enabled */ boolean_t do_force:1; /* true if force enabled */
boolean_t do_foreground:1; /* true if run in foreground */ boolean_t do_foreground:1; /* true if run in foreground */

View File

@ -38,6 +38,8 @@
#define MAXBUF 4096 #define MAXBUF 4096
static int max_zevent_buf_len = 1 << 20;
/* /*
* Open the libzfs interface. * Open the libzfs interface.
*/ */
@ -70,6 +72,9 @@ zed_event_init(struct zed_conf *zcp)
zed_log_die("Failed to initialize disk events"); zed_log_die("Failed to initialize disk events");
} }
if (zcp->max_zevent_buf_len != 0)
max_zevent_buf_len = zcp->max_zevent_buf_len;
return (0); return (0);
} }
@ -105,7 +110,7 @@ _bump_event_queue_length(void)
{ {
int zzlm = -1, wr; int zzlm = -1, wr;
char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */ char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
long int qlen; long int qlen, orig_qlen;
zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR); zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR);
if (zzlm < 0) if (zzlm < 0)
@ -116,7 +121,7 @@ _bump_event_queue_length(void)
qlen_buf[sizeof (qlen_buf) - 1] = '\0'; qlen_buf[sizeof (qlen_buf) - 1] = '\0';
errno = 0; errno = 0;
qlen = strtol(qlen_buf, NULL, 10); orig_qlen = qlen = strtol(qlen_buf, NULL, 10);
if (errno == ERANGE) if (errno == ERANGE)
goto done; goto done;
@ -125,8 +130,14 @@ _bump_event_queue_length(void)
else else
qlen *= 2; qlen *= 2;
if (qlen > INT_MAX) /*
qlen = INT_MAX; * Don't consume all of kernel memory with event logs if something
* goes wrong.
*/
if (qlen > max_zevent_buf_len)
qlen = max_zevent_buf_len;
if (qlen == orig_qlen)
goto done;
wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen); wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen);
if (pwrite(zzlm, qlen_buf, wr, 0) < 0) if (pwrite(zzlm, qlen_buf, wr, 0) < 0)

View File

@ -27,6 +27,7 @@
.Op Fl P Ar path .Op Fl P Ar path
.Op Fl s Ar statefile .Op Fl s Ar statefile
.Op Fl j Ar jobs .Op Fl j Ar jobs
.Op Fl b Ar buflen
. .
.Sh DESCRIPTION .Sh DESCRIPTION
The The
@ -96,6 +97,17 @@ ZEDLETs to run concurrently,
delaying execution of new ones until they finish. delaying execution of new ones until they finish.
Defaults to Defaults to
.Sy 16 . .Sy 16 .
.It Fl b Ar buflen
Cap kernel event buffer growth to
.Ar buflen
entries.
This buffer is grown when the daemon misses an event, but results in
unreclaimable memory use in the kernel.
A value of
.Sy 0
removes the cap.
Defaults to
.Sy 1048576 .
.El .El
.Sh ZEVENTS .Sh ZEVENTS
A zevent is comprised of a list of nvpairs (name/value pairs). A zevent is comprised of a list of nvpairs (name/value pairs).

View File

@ -88,6 +88,8 @@ int zfs_max_recordsize = 16 * 1024 * 1024;
#endif #endif
static int zfs_allow_redacted_dataset_mount = 0; static int zfs_allow_redacted_dataset_mount = 0;
int zfs_snapshot_history_enabled = 1;
#define SWITCH64(x, y) \ #define SWITCH64(x, y) \
{ \ { \
uint64_t __tmp = (x); \ uint64_t __tmp = (x); \
@ -1867,6 +1869,7 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsl_dir_snap_cmtime_update(ds->ds_dir, tx); dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
if (zfs_snapshot_history_enabled)
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " "); spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
} }
@ -4985,6 +4988,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
"Allow mounting of redacted datasets"); "Allow mounting of redacted datasets");
ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW,
"Include snapshot events in pool history/events");
EXPORT_SYMBOL(dsl_dataset_hold); EXPORT_SYMBOL(dsl_dataset_hold);
EXPORT_SYMBOL(dsl_dataset_hold_flags); EXPORT_SYMBOL(dsl_dataset_hold_flags);
EXPORT_SYMBOL(dsl_dataset_hold_obj); EXPORT_SYMBOL(dsl_dataset_hold_obj);

View File

@ -49,6 +49,8 @@
#include <sys/zthr.h> #include <sys/zthr.h>
#include <sys/spa_impl.h> #include <sys/spa_impl.h>
extern int zfs_snapshot_history_enabled;
int int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{ {
@ -321,14 +323,19 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
spa_history_log_internal_ds(ds, "defer_destroy", tx, " "); if (zfs_snapshot_history_enabled) {
spa_history_log_internal_ds(ds, "defer_destroy", tx,
" ");
}
return; return;
} }
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
if (zfs_snapshot_history_enabled) {
/* We need to log before removing it from the namespace. */ /* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " "); spa_history_log_internal_ds(ds, "destroy", tx, " ");
}
dsl_scan_ds_destroyed(ds, tx); dsl_scan_ds_destroyed(ds, tx);