OpenZFS 8063 - verify that we do not attempt to access inactive txg

Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Serapheim Dimitropoulos <serapheim@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>

A standard practice in ZFS is to keep track of "per-txg" state. Any of
the 3 active TXG's (open, quiescing, syncing) can have different values
for this state. We should assert that we do not attempt to modify other
(inactive) TXG's.

Porting Notes:
- ASSERTV added to txg_sync_waiting() for unused variable.

OpenZFS-issue: https://www.illumos.org/issues/8063
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/01acb46
Closes #6109
This commit is contained in:
Matthew Ahrens 2017-04-24 09:34:36 -07:00 committed by Brian Behlendorf
parent 335b251ac1
commit 4747a7d3d4
8 changed files with 53 additions and 26 deletions

View File

@ -23,7 +23,7 @@
* Use is subject to license terms. * Use is subject to license terms.
*/ */
/* /*
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/ */
#ifndef _SYS_TXG_H #ifndef _SYS_TXG_H
@ -60,6 +60,7 @@ typedef struct txg_node {
typedef struct txg_list { typedef struct txg_list {
kmutex_t tl_lock; kmutex_t tl_lock;
size_t tl_offset; size_t tl_offset;
spa_t *tl_spa;
txg_node_t *tl_head[TXG_SIZE]; txg_node_t *tl_head[TXG_SIZE];
} txg_list_t; } txg_list_t;
@ -103,6 +104,8 @@ extern boolean_t txg_stalled(struct dsl_pool *dp);
/* returns TRUE if someone is waiting for the next txg to sync */ /* returns TRUE if someone is waiting for the next txg to sync */
extern boolean_t txg_sync_waiting(struct dsl_pool *dp); extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
extern void txg_verify(spa_t *spa, uint64_t txg);
/* /*
* Wait for pending commit callbacks of already-synced transactions to finish * Wait for pending commit callbacks of already-synced transactions to finish
* processing. * processing.
@ -115,7 +118,7 @@ extern void txg_wait_callbacks(struct dsl_pool *dp);
#define TXG_CLEAN(txg) ((txg) - 1) #define TXG_CLEAN(txg) ((txg) - 1)
extern void txg_list_create(txg_list_t *tl, size_t offset); extern void txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset);
extern void txg_list_destroy(txg_list_t *tl); extern void txg_list_destroy(txg_list_t *tl);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg); extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
extern boolean_t txg_all_lists_empty(txg_list_t *tl); extern boolean_t txg_all_lists_empty(txg_list_t *tl);

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/ */
/* Portions Copyright 2010 Robert Milkowski */ /* Portions Copyright 2010 Robert Milkowski */
@ -94,6 +94,15 @@ typedef struct zil_chain {
#define ZIL_MIN_BLKSZ 4096ULL #define ZIL_MIN_BLKSZ 4096ULL
/*
* ziltest is by and large an ugly hack, but very useful in
* checking replay without tedious work.
* When running ziltest we want to keep all itx's and so maintain
* a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
* We subtract TXG_CONCURRENT_STATES to allow for common code.
*/
#define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
/* /*
* The words of a log block checksum. * The words of a log block checksum.
*/ */

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/ */
#include <sys/dmu.h> #include <sys/dmu.h>
@ -87,7 +87,7 @@ dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
{ {
dmu_tx_t *tx = dmu_tx_create_dd(NULL); dmu_tx_t *tx = dmu_tx_create_dd(NULL);
ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg); txg_verify(dp->dp_spa, txg);
tx->tx_pool = dp; tx->tx_pool = dp;
tx->tx_txg = txg; tx->tx_txg = txg;
tx->tx_anyobj = TRUE; tx->tx_anyobj = TRUE;

View File

@ -161,13 +161,13 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
rrw_init(&dp->dp_config_rwlock, B_TRUE); rrw_init(&dp->dp_config_rwlock, B_TRUE);
txg_init(dp, txg); txg_init(dp, txg);
txg_list_create(&dp->dp_dirty_datasets, txg_list_create(&dp->dp_dirty_datasets, spa,
offsetof(dsl_dataset_t, ds_dirty_link)); offsetof(dsl_dataset_t, ds_dirty_link));
txg_list_create(&dp->dp_dirty_zilogs, txg_list_create(&dp->dp_dirty_zilogs, spa,
offsetof(zilog_t, zl_dirty_link)); offsetof(zilog_t, zl_dirty_link));
txg_list_create(&dp->dp_dirty_dirs, txg_list_create(&dp->dp_dirty_dirs, spa,
offsetof(dsl_dir_t, dd_dirty_link)); offsetof(dsl_dir_t, dd_dirty_link));
txg_list_create(&dp->dp_sync_tasks, txg_list_create(&dp->dp_sync_tasks, spa,
offsetof(dsl_sync_task_t, dst_node)); offsetof(dsl_sync_task_t, dst_node));
dp->dp_sync_taskq = taskq_create("dp_sync_taskq", dp->dp_sync_taskq = taskq_create("dp_sync_taskq",

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@ -1136,7 +1136,7 @@ spa_activate(spa_t *spa, int mode)
list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_state_dirty_node)); offsetof(vdev_t, vdev_state_dirty_node));
txg_list_create(&spa->spa_vdev_txg_list, txg_list_create(&spa->spa_vdev_txg_list, spa,
offsetof(struct vdev, vdev_txg_node)); offsetof(struct vdev, vdev_txg_node));
avl_create(&spa->spa_errlist_scrub, avl_create(&spa->spa_errlist_scrub,

View File

@ -21,7 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 Martin Matuska * Portions Copyright 2011 Martin Matuska
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -31,6 +31,7 @@
#include <sys/dmu_tx.h> #include <sys/dmu_tx.h>
#include <sys/dsl_pool.h> #include <sys/dsl_pool.h>
#include <sys/dsl_scan.h> #include <sys/dsl_scan.h>
#include <sys/zil.h>
#include <sys/callb.h> #include <sys/callb.h>
#include <sys/trace_txg.h> #include <sys/trace_txg.h>
@ -722,17 +723,33 @@ txg_sync_waiting(dsl_pool_t *dp)
tx->tx_quiesced_txg != 0); tx->tx_quiesced_txg != 0);
} }
/*
* Verify that this txg is active (open, quiescing, syncing). Non-active
* txg's should not be manipulated.
*/
void
txg_verify(spa_t *spa, uint64_t txg)
{
ASSERTV(dsl_pool_t *dp = spa_get_dsl(spa));
if (txg <= TXG_INITIAL || txg == ZILTEST_TXG)
return;
ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
ASSERT3U(txg, >=, dp->dp_tx.tx_synced_txg);
ASSERT3U(txg, >=, dp->dp_tx.tx_open_txg - TXG_CONCURRENT_STATES);
}
/* /*
* Per-txg object lists. * Per-txg object lists.
*/ */
void void
txg_list_create(txg_list_t *tl, size_t offset) txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset)
{ {
int t; int t;
mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
tl->tl_offset = offset; tl->tl_offset = offset;
tl->tl_spa = spa;
for (t = 0; t < TXG_SIZE; t++) for (t = 0; t < TXG_SIZE; t++)
tl->tl_head[t] = NULL; tl->tl_head[t] = NULL;
@ -752,6 +769,7 @@ txg_list_destroy(txg_list_t *tl)
boolean_t boolean_t
txg_list_empty(txg_list_t *tl, uint64_t txg) txg_list_empty(txg_list_t *tl, uint64_t txg)
{ {
txg_verify(tl->tl_spa, txg);
return (tl->tl_head[txg & TXG_MASK] == NULL); return (tl->tl_head[txg & TXG_MASK] == NULL);
} }
@ -786,6 +804,7 @@ txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
boolean_t add; boolean_t add;
txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock); mutex_enter(&tl->tl_lock);
add = (tn->tn_member[t] == 0); add = (tn->tn_member[t] == 0);
if (add) { if (add) {
@ -810,6 +829,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
boolean_t add; boolean_t add;
txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock); mutex_enter(&tl->tl_lock);
add = (tn->tn_member[t] == 0); add = (tn->tn_member[t] == 0);
if (add) { if (add) {
@ -837,6 +857,7 @@ txg_list_remove(txg_list_t *tl, uint64_t txg)
txg_node_t *tn; txg_node_t *tn;
void *p = NULL; void *p = NULL;
txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock); mutex_enter(&tl->tl_lock);
if ((tn = tl->tl_head[t]) != NULL) { if ((tn = tl->tl_head[t]) != NULL) {
p = (char *)tn - tl->tl_offset; p = (char *)tn - tl->tl_offset;
@ -858,6 +879,7 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK; int t = txg & TXG_MASK;
txg_node_t *tn, **tp; txg_node_t *tn, **tp;
txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock); mutex_enter(&tl->tl_lock);
for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) { for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
@ -881,6 +903,7 @@ txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK; int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
txg_verify(tl->tl_spa, txg);
return (tn->tn_member[t] != 0); return (tn->tn_member[t] != 0);
} }
@ -893,6 +916,7 @@ txg_list_head(txg_list_t *tl, uint64_t txg)
int t = txg & TXG_MASK; int t = txg & TXG_MASK;
txg_node_t *tn = tl->tl_head[t]; txg_node_t *tn = tl->tl_head[t];
txg_verify(tl->tl_spa, txg);
return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
} }
@ -902,6 +926,7 @@ txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK; int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset); txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
txg_verify(tl->tl_spa, txg);
tn = tn->tn_next[t]; tn = tn->tn_next[t];
return (tn == NULL ? NULL : (char *)tn - tl->tl_offset); return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);

View File

@ -370,9 +370,9 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_dtl[t] = range_tree_create(NULL, NULL, vd->vdev_dtl[t] = range_tree_create(NULL, NULL,
&vd->vdev_dtl_lock); &vd->vdev_dtl_lock);
} }
txg_list_create(&vd->vdev_ms_list, txg_list_create(&vd->vdev_ms_list, spa,
offsetof(struct metaslab, ms_txg_node)); offsetof(struct metaslab, ms_txg_node));
txg_list_create(&vd->vdev_dtl_list, txg_list_create(&vd->vdev_dtl_list, spa,
offsetof(struct vdev, vdev_dtl_node)); offsetof(struct vdev, vdev_dtl_node));
vd->vdev_stat.vs_timestamp = gethrtime(); vd->vdev_stat.vs_timestamp = gethrtime();
vdev_queue_init(vd); vdev_queue_init(vd);

View File

@ -20,7 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved. * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com] * Copyright (c) 2014 Integros [integros.com]
*/ */
@ -109,16 +109,6 @@ static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
#define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \ #define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused)) sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
/*
* ziltest is by and large an ugly hack, but very useful in
* checking replay without tedious work.
* When running ziltest we want to keep all itx's and so maintain
* a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
* We subtract TXG_CONCURRENT_STATES to allow for common code.
*/
#define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
static int static int
zil_bp_compare(const void *x1, const void *x2) zil_bp_compare(const void *x1, const void *x2)
{ {