3112 ztest does not honor ZFS_DEBUG 3113 ztest should use watchpoints to protect frozen arc bufs 3114 some leaked nvlists in zfsdev_ioctl Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Matt Amdur <Matt.Amdur@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <eric.schrock@delphix.com> References: https://www.illumos.org/issues/3112 https://www.illumos.org/issues/3113 https://www.illumos.org/issues/3114 illumos/illumos-gate@cd1c8b85eb The /proc/self/cmd watchpoint interface is specific to Solaris. Therefore, the #3113 implementation was reworked to use the more portable mprotect(2) system call. When the pages are watched they are marked read-only for protection. Any write to the protected address range immediately trigger a SIGSEGV. The pages are marked writable again when they are unwatched. Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #1489
This commit is contained in:
parent
03c6040bee
commit
498877baf5
|
@ -6210,11 +6210,12 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
(void) setvbuf(stdout, NULL, _IOLBF, 0);
|
(void) setvbuf(stdout, NULL, _IOLBF, 0);
|
||||||
|
|
||||||
|
dprintf_setup(&argc, argv);
|
||||||
|
|
||||||
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
|
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
|
||||||
ASSERT3S(ztest_fd_rand, >=, 0);
|
ASSERT3S(ztest_fd_rand, >=, 0);
|
||||||
|
|
||||||
if (!fd_data_str) {
|
if (!fd_data_str) {
|
||||||
dprintf_setup(&argc, argv);
|
|
||||||
process_options(argc, argv);
|
process_options(argc, argv);
|
||||||
|
|
||||||
setup_data_fd();
|
setup_data_fd();
|
||||||
|
|
|
@ -136,6 +136,7 @@ int arc_buf_size(arc_buf_t *buf);
|
||||||
void arc_release(arc_buf_t *buf, void *tag);
|
void arc_release(arc_buf_t *buf, void *tag);
|
||||||
int arc_released(arc_buf_t *buf);
|
int arc_released(arc_buf_t *buf);
|
||||||
int arc_has_callback(arc_buf_t *buf);
|
int arc_has_callback(arc_buf_t *buf);
|
||||||
|
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
|
||||||
void arc_buf_freeze(arc_buf_t *buf);
|
void arc_buf_freeze(arc_buf_t *buf);
|
||||||
void arc_buf_thaw(arc_buf_t *buf);
|
void arc_buf_thaw(arc_buf_t *buf);
|
||||||
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
|
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
|
||||||
|
@ -183,6 +184,10 @@ extern int zfs_write_limit_shift;
|
||||||
extern unsigned long zfs_write_limit_max;
|
extern unsigned long zfs_write_limit_max;
|
||||||
extern kmutex_t zfs_write_limit_lock;
|
extern kmutex_t zfs_write_limit_lock;
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
extern boolean_t arc_watch;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -97,6 +97,8 @@
|
||||||
#include <dirent.h>
|
#include <dirent.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
#include <sys/note.h>
|
#include <sys/note.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/cred.h>
|
#include <sys/cred.h>
|
||||||
|
|
|
@ -145,6 +145,11 @@
|
||||||
#include <sys/dmu_tx.h>
|
#include <sys/dmu_tx.h>
|
||||||
#include <zfs_fletcher.h>
|
#include <zfs_fletcher.h>
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
|
||||||
|
boolean_t arc_watch = B_FALSE;
|
||||||
|
#endif
|
||||||
|
|
||||||
static kmutex_t arc_reclaim_thr_lock;
|
static kmutex_t arc_reclaim_thr_lock;
|
||||||
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
|
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
|
||||||
static uint8_t arc_thread_exit;
|
static uint8_t arc_thread_exit;
|
||||||
|
@ -569,6 +574,7 @@ static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock);
|
||||||
static int arc_evict_needed(arc_buf_contents_t type);
|
static int arc_evict_needed(arc_buf_contents_t type);
|
||||||
static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
|
static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes,
|
||||||
arc_buf_contents_t type);
|
arc_buf_contents_t type);
|
||||||
|
static void arc_buf_watch(arc_buf_t *buf);
|
||||||
|
|
||||||
static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
|
static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab);
|
||||||
|
|
||||||
|
@ -1060,6 +1066,37 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
|
||||||
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
|
fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
|
||||||
buf->b_hdr->b_freeze_cksum);
|
buf->b_hdr->b_freeze_cksum);
|
||||||
mutex_exit(&buf->b_hdr->b_freeze_lock);
|
mutex_exit(&buf->b_hdr->b_freeze_lock);
|
||||||
|
arc_buf_watch(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
void
|
||||||
|
arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
|
||||||
|
{
|
||||||
|
panic("Got SIGSEGV at address: 0x%lx\n", (long) si->si_addr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ARGSUSED */
|
||||||
|
static void
|
||||||
|
arc_buf_unwatch(arc_buf_t *buf)
|
||||||
|
{
|
||||||
|
#ifndef _KERNEL
|
||||||
|
if (arc_watch) {
|
||||||
|
ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size,
|
||||||
|
PROT_READ | PROT_WRITE));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ARGSUSED */
|
||||||
|
static void
|
||||||
|
arc_buf_watch(arc_buf_t *buf)
|
||||||
|
{
|
||||||
|
#ifndef _KERNEL
|
||||||
|
if (arc_watch)
|
||||||
|
ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size, PROT_READ));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1080,6 +1117,8 @@ arc_buf_thaw(arc_buf_t *buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_exit(&buf->b_hdr->b_freeze_lock);
|
mutex_exit(&buf->b_hdr->b_freeze_lock);
|
||||||
|
|
||||||
|
arc_buf_unwatch(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1097,6 +1136,7 @@ arc_buf_freeze(arc_buf_t *buf)
|
||||||
buf->b_hdr->b_state == arc_anon);
|
buf->b_hdr->b_state == arc_anon);
|
||||||
arc_cksum_compute(buf, B_FALSE);
|
arc_cksum_compute(buf, B_FALSE);
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -1504,21 +1544,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
|
||||||
* the buffer is placed on l2arc_free_on_write to be freed later.
|
* the buffer is placed on l2arc_free_on_write to be freed later.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t),
|
arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
|
||||||
void *data, size_t size)
|
|
||||||
{
|
{
|
||||||
|
arc_buf_hdr_t *hdr = buf->b_hdr;
|
||||||
|
|
||||||
if (HDR_L2_WRITING(hdr)) {
|
if (HDR_L2_WRITING(hdr)) {
|
||||||
l2arc_data_free_t *df;
|
l2arc_data_free_t *df;
|
||||||
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
|
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE);
|
||||||
df->l2df_data = data;
|
df->l2df_data = buf->b_data;
|
||||||
df->l2df_size = size;
|
df->l2df_size = hdr->b_size;
|
||||||
df->l2df_func = free_func;
|
df->l2df_func = free_func;
|
||||||
mutex_enter(&l2arc_free_on_write_mtx);
|
mutex_enter(&l2arc_free_on_write_mtx);
|
||||||
list_insert_head(l2arc_free_on_write, df);
|
list_insert_head(l2arc_free_on_write, df);
|
||||||
mutex_exit(&l2arc_free_on_write_mtx);
|
mutex_exit(&l2arc_free_on_write_mtx);
|
||||||
ARCSTAT_BUMP(arcstat_l2_free_on_write);
|
ARCSTAT_BUMP(arcstat_l2_free_on_write);
|
||||||
} else {
|
} else {
|
||||||
free_func(data, size);
|
free_func(buf->b_data, hdr->b_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1534,16 +1575,15 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
|
||||||
arc_buf_contents_t type = buf->b_hdr->b_type;
|
arc_buf_contents_t type = buf->b_hdr->b_type;
|
||||||
|
|
||||||
arc_cksum_verify(buf);
|
arc_cksum_verify(buf);
|
||||||
|
arc_buf_unwatch(buf);
|
||||||
|
|
||||||
if (!recycle) {
|
if (!recycle) {
|
||||||
if (type == ARC_BUFC_METADATA) {
|
if (type == ARC_BUFC_METADATA) {
|
||||||
arc_buf_data_free(buf->b_hdr, zio_buf_free,
|
arc_buf_data_free(buf, zio_buf_free);
|
||||||
buf->b_data, size);
|
|
||||||
arc_space_return(size, ARC_SPACE_DATA);
|
arc_space_return(size, ARC_SPACE_DATA);
|
||||||
} else {
|
} else {
|
||||||
ASSERT(type == ARC_BUFC_DATA);
|
ASSERT(type == ARC_BUFC_DATA);
|
||||||
arc_buf_data_free(buf->b_hdr,
|
arc_buf_data_free(buf, zio_data_buf_free);
|
||||||
zio_data_buf_free, buf->b_data, size);
|
|
||||||
ARCSTAT_INCR(arcstat_data_size, -size);
|
ARCSTAT_INCR(arcstat_data_size, -size);
|
||||||
atomic_add_64(&arc_size, -size);
|
atomic_add_64(&arc_size, -size);
|
||||||
}
|
}
|
||||||
|
@ -2908,6 +2948,7 @@ arc_read_done(zio_t *zio)
|
||||||
}
|
}
|
||||||
|
|
||||||
arc_cksum_compute(buf, B_FALSE);
|
arc_cksum_compute(buf, B_FALSE);
|
||||||
|
arc_buf_watch(buf);
|
||||||
|
|
||||||
if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
|
if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
|
||||||
/*
|
/*
|
||||||
|
@ -3542,6 +3583,7 @@ arc_release(arc_buf_t *buf, void *tag)
|
||||||
}
|
}
|
||||||
hdr->b_datacnt -= 1;
|
hdr->b_datacnt -= 1;
|
||||||
arc_cksum_verify(buf);
|
arc_cksum_verify(buf);
|
||||||
|
arc_buf_unwatch(buf);
|
||||||
|
|
||||||
mutex_exit(hash_lock);
|
mutex_exit(hash_lock);
|
||||||
|
|
||||||
|
|
|
@ -1630,6 +1630,23 @@ spa_init(int mode)
|
||||||
|
|
||||||
spa_mode_global = mode;
|
spa_mode_global = mode;
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
if (spa_mode_global != FREAD && dprintf_find_string("watch")) {
|
||||||
|
struct sigaction sa;
|
||||||
|
|
||||||
|
sa.sa_flags = SA_SIGINFO;
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
sa.sa_sigaction = arc_buf_sigsegv;
|
||||||
|
|
||||||
|
if (sigaction(SIGSEGV, &sa, NULL) == -1) {
|
||||||
|
perror("could not enable watchpoints: "
|
||||||
|
"sigaction(SIGSEGV, ...) = ");
|
||||||
|
} else {
|
||||||
|
arc_watch = B_TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
fm_init();
|
fm_init();
|
||||||
refcount_init();
|
refcount_init();
|
||||||
unique_init();
|
unique_init();
|
||||||
|
|
|
@ -169,11 +169,21 @@ zio_init(void)
|
||||||
while (p2 & (p2 - 1))
|
while (p2 & (p2 - 1))
|
||||||
p2 &= p2 - 1;
|
p2 &= p2 - 1;
|
||||||
|
|
||||||
|
#ifndef _KERNEL
|
||||||
|
/*
|
||||||
|
* If we are using watchpoints, put each buffer on its own page,
|
||||||
|
* to eliminate the performance overhead of trapping to the
|
||||||
|
* kernel when modifying a non-watched buffer that shares the
|
||||||
|
* page with a watched buffer.
|
||||||
|
*/
|
||||||
|
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
|
||||||
|
continue;
|
||||||
|
#endif
|
||||||
if (size <= 4 * SPA_MINBLOCKSIZE) {
|
if (size <= 4 * SPA_MINBLOCKSIZE) {
|
||||||
align = SPA_MINBLOCKSIZE;
|
align = SPA_MINBLOCKSIZE;
|
||||||
} else if (P2PHASE(size, PAGESIZE) == 0) {
|
} else if (IS_P2ALIGNED(size, PAGESIZE)) {
|
||||||
align = PAGESIZE;
|
align = PAGESIZE;
|
||||||
} else if (P2PHASE(size, p2 >> 2) == 0) {
|
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
|
||||||
align = p2 >> 2;
|
align = p2 >> 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue