Fixes in persistent error log

Address the following bugs in persistent error log:

1) Check nested clones, eg "fs->snap->clone->snap2->clone2".

2) When deleting files containing error blocks in those clones (from
   "clone" the example above), do not break the check chain.

3) When deleting files in the originating fs before syncing the errlog
   to disk, do not break the check chain. This happens because at the
   time of introducing the error block in the error list, we do not have
   its birth txg and the head filesystem. If the original file is
   deleted before the error list is synced to the error log (which is
   when we actually lookup the birth txg and the head filesystem), then
   we do not have access to this info anymore and break the check chain.

The most prominent change is related to achieving (3). We expand the
spa_error_entry_t structure to accommodate the newly introduced
zbookmark_err_phys_t structure (containing the birth txg of the error
block).Due to compatibility reasons we cannot remove the
zbookmark_phys_t structure and we also need to place the new structure
after se_avl, so it is not accounted for in avl_find(). Then we modify
spa_log_error() to also provide the birth txg of the error block. With
these changes in place we simplify the previously introduced function
get_head_and_birth_txg() (now named get_head_ds()).

We chose not to follow the same approach for the head filesystem (thus
completely removing get_head_ds()) to avoid introducing new lock
contentions.

The stack sizes of nested functions (as measured by checkstack.pl in the
linux kernel) are:
check_filesystem [zfs]: 272 (was 912)
check_clones [zfs]: 64

We also introduced two new tests covering the above changes.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #14633
This commit is contained in:
George Amanakis 2023-03-29 01:51:58 +02:00 committed by GitHub
parent 65d10bd87c
commit 431083f75b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 423 additions and 173 deletions

View File

@ -65,6 +65,7 @@ typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t; typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t; typedef struct ddt_entry ddt_entry_t;
typedef struct zbookmark_phys zbookmark_phys_t; typedef struct zbookmark_phys zbookmark_phys_t;
typedef struct zbookmark_err_phys zbookmark_err_phys_t;
struct bpobj; struct bpobj;
struct bplist; struct bplist;
@ -1134,7 +1135,8 @@ extern const char *spa_state_to_name(spa_t *spa);
/* error handling */ /* error handling */
struct zbookmark_phys; struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb); extern void spa_log_error(spa_t *spa, const zbookmark_phys_t *zb,
const uint64_t *birth);
extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb); extern void spa_remove_error(spa_t *spa, zbookmark_phys_t *zb);
extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd, extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, zio_t *zio, uint64_t state); const zbookmark_phys_t *zb, zio_t *zio, uint64_t state);

View File

@ -66,6 +66,7 @@ typedef struct spa_error_entry {
zbookmark_phys_t se_bookmark; zbookmark_phys_t se_bookmark;
char *se_name; char *se_name;
avl_node_t se_avl; avl_node_t se_avl;
zbookmark_err_phys_t se_zep; /* not accounted in avl_find */
} spa_error_entry_t; } spa_error_entry_t;
typedef struct spa_history_phys { typedef struct spa_history_phys {

View File

@ -303,12 +303,12 @@ struct zbookmark_phys {
uint64_t zb_blkid; uint64_t zb_blkid;
}; };
typedef struct zbookmark_err_phys { struct zbookmark_err_phys {
uint64_t zb_object; uint64_t zb_object;
int64_t zb_level; int64_t zb_level;
uint64_t zb_blkid; uint64_t zb_blkid;
uint64_t zb_birth; uint64_t zb_birth;
} zbookmark_err_phys_t; };
#define SET_BOOKMARK(zb, objset, object, level, blkid) \ #define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \ { \

View File

@ -565,6 +565,7 @@ and keyed by the head id.
In case of encrypted filesystems with unloaded keys or unmounted encrypted In case of encrypted filesystems with unloaded keys or unmounted encrypted
filesystems we are unable to check their snapshots or clones for errors and filesystems we are unable to check their snapshots or clones for errors and
these will not be reported. these will not be reported.
In this case no filenames will be reported either.
With this feature enabled, every dataset affected by an error block is listed With this feature enabled, every dataset affected by an error block is listed
in the output of in the output of
.Nm zpool Cm status . .Nm zpool Cm status .

View File

@ -2209,7 +2209,7 @@ arc_untransform(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
* (and generate an ereport) before leaving the ARC. * (and generate an ereport) before leaving the ARC.
*/ */
ret = SET_ERROR(EIO); ret = SET_ERROR(EIO);
spa_log_error(spa, zb); spa_log_error(spa, zb, &buf->b_hdr->b_birth);
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0); spa, NULL, zb, NULL, 0);
} }
@ -5540,7 +5540,8 @@ arc_read_done(zio_t *zio)
ASSERT(BP_IS_PROTECTED(bp)); ASSERT(BP_IS_PROTECTED(bp));
error = SET_ERROR(EIO); error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(zio->io_spa, &acb->acb_zb); spa_log_error(zio->io_spa, &acb->acb_zb,
&zio->io_bp->blk_birth);
(void) zfs_ereport_post( (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION, FM_EREPORT_ZFS_AUTHENTICATION,
zio->io_spa, NULL, &acb->acb_zb, zio, 0); zio->io_spa, NULL, &acb->acb_zb, zio, 0);
@ -5833,7 +5834,7 @@ top:
*/ */
rc = SET_ERROR(EIO); rc = SET_ERROR(EIO);
if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, zb); spa_log_error(spa, zb, &hdr->b_birth);
(void) zfs_ereport_post( (void) zfs_ereport_post(
FM_EREPORT_ZFS_AUTHENTICATION, FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, zb, NULL, 0); spa, NULL, zb, NULL, 0);

View File

@ -1620,7 +1620,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
* If this is not true it indicates tampering and we report an error. * If this is not true it indicates tampering and we report an error.
*/ */
if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) { if (db->db_objset->os_encrypted && !BP_USES_CRYPT(bpp)) {
spa_log_error(db->db_objset->os_spa, &zb); spa_log_error(db->db_objset->os_spa, &zb,
&db->db_blkptr->blk_birth);
zfs_panic_recover("unencrypted block in encrypted " zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(db->db_objset)); "object set %llu", dmu_objset_id(db->db_objset));
err = SET_ERROR(EIO); err = SET_ERROR(EIO);

View File

@ -1123,7 +1123,7 @@ send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/ */
if (sta->os->os_encrypted && if (sta->os->os_encrypted &&
!BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) { !BP_IS_HOLE(bp) && !BP_USES_CRYPT(bp)) {
spa_log_error(spa, zb); spa_log_error(spa, zb, &bp->blk_birth);
zfs_panic_recover("unencrypted block in encrypted " zfs_panic_recover("unencrypted block in encrypted "
"object set %llu", dmu_objset_id(sta->os)); "object set %llu", dmu_objset_id(sta->os));
return (SET_ERROR(EIO)); return (SET_ERROR(EIO));

View File

@ -1881,7 +1881,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
if (dnp != NULL && if (dnp != NULL &&
dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) { dnp->dn_bonuslen > DN_MAX_BONUS_LEN(dnp)) {
scn->scn_phys.scn_errors++; scn->scn_phys.scn_errors++;
spa_log_error(spa, zb); spa_log_error(spa, zb, &bp->blk_birth);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -1976,7 +1976,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
* by arc_read() for the cases above. * by arc_read() for the cases above.
*/ */
scn->scn_phys.scn_errors++; scn->scn_phys.scn_errors++;
spa_log_error(spa, zb); spa_log_error(spa, zb, &bp->blk_birth);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }

View File

@ -135,6 +135,10 @@ name_to_bookmark(char *buf, zbookmark_phys_t *zb)
} }
#ifdef _KERNEL #ifdef _KERNEL
static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
uint64_t *snap_obj_array, zbookmark_err_phys_t *zep, void* uaddr,
uint64_t *count);
static void static void
zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep, zbookmark_phys_t *zb) zep_to_zb(uint64_t dataset, zbookmark_err_phys_t *zep, zbookmark_phys_t *zb)
{ {
@ -152,74 +156,22 @@ name_to_object(char *buf, uint64_t *obj)
ASSERT(*buf == '\0'); ASSERT(*buf == '\0');
} }
static int /*
get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj, * Retrieve the head filesystem.
uint64_t *head_dataset_id) */
static int get_head_ds(spa_t *spa, uint64_t dsobj, uint64_t *head_ds)
{ {
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_dataset_t *ds; dsl_dataset_t *ds;
objset_t *os; int error = dsl_dataset_hold_obj(spa->spa_dsl_pool,
dsobj, FTAG, &ds);
int error = dsl_dataset_hold_obj(dp, ds_obj, FTAG, &ds); if (error != 0)
if (error != 0) {
return (error); return (error);
}
ASSERT(head_dataset_id);
*head_dataset_id = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
error = dmu_objset_from_ds(ds, &os); ASSERT(head_ds);
if (error != 0) { *head_ds = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (error);
}
/*
* If the key is not loaded dbuf_dnode_findbp() will error out with
* EACCES. However in that case dnode_hold() will eventually call
* dbuf_read()->zio_wait() which may call spa_log_error(). This will
* lead to a deadlock due to us holding the mutex spa_errlist_lock.
* Avoid this by checking here if the keys are loaded, if not return.
* If the keys are not loaded the head_errlog feature is meaningless
* as we cannot figure out the birth txg of the block pointer.
*/
if (dsl_dataset_get_keystatus(ds->ds_dir) ==
ZFS_KEYSTATUS_UNAVAILABLE) {
zep->zb_birth = 0;
dsl_dataset_rele(ds, FTAG);
return (0);
}
dnode_t *dn;
blkptr_t bp;
error = dnode_hold(os, zep->zb_object, FTAG, &dn);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
rw_enter(&dn->dn_struct_rwlock, RW_READER);
error = dbuf_dnode_findbp(dn, zep->zb_level, zep->zb_blkid, &bp, NULL,
NULL);
if (error == 0 && BP_IS_HOLE(&bp))
error = SET_ERROR(ENOENT);
/*
* If the key is loaded but the encrypted filesystem is unmounted when
* a scrub is run, then dbuf_dnode_findbp() will still error out with
* EACCES (possibly due to the key mapping being removed upon
* unmounting). In that case the head_errlog feature is also
* meaningless as we cannot figure out the birth txg of the block
* pointer.
*/
if (error == EACCES)
error = 0;
else if (!error)
zep->zb_birth = bp.blk_birth;
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
dsl_dataset_rele(ds, FTAG);
return (error); return (error);
} }
@ -229,7 +181,7 @@ get_head_and_birth_txg(spa_t *spa, zbookmark_err_phys_t *zep, uint64_t ds_obj,
* during spa_errlog_sync(). * during spa_errlog_sync().
*/ */
void void
spa_log_error(spa_t *spa, const zbookmark_phys_t *zb) spa_log_error(spa_t *spa, const zbookmark_phys_t *zb, const uint64_t *birth)
{ {
spa_error_entry_t search; spa_error_entry_t search;
spa_error_entry_t *new; spa_error_entry_t *new;
@ -262,8 +214,26 @@ spa_log_error(spa_t *spa, const zbookmark_phys_t *zb)
new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP); new = kmem_zalloc(sizeof (spa_error_entry_t), KM_SLEEP);
new->se_bookmark = *zb; new->se_bookmark = *zb;
avl_insert(tree, new, where);
/*
* If the head_errlog feature is enabled, store the birth txg now. In
* case the file is deleted before spa_errlog_sync() runs, we will not
* be able to retrieve the birth txg.
*/
if (spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
new->se_zep.zb_object = zb->zb_object;
new->se_zep.zb_level = zb->zb_level;
new->se_zep.zb_blkid = zb->zb_blkid;
/*
* birth may end up being NULL, e.g. in zio_done(). We
* will handle this in process_error_block().
*/
if (birth != NULL)
new->se_zep.zb_birth = *birth;
}
avl_insert(tree, new, where);
mutex_exit(&spa->spa_errlist_lock); mutex_exit(&spa->spa_errlist_lock);
} }
@ -336,20 +306,28 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
error = find_birth_txg(ds, zep, &latest_txg); error = find_birth_txg(ds, zep, &latest_txg);
/* /*
* If we cannot figure out the current birth txg of the block pointer * If the filesystem is encrypted and the key is not loaded
* error out. If the filesystem is encrypted and the key is not loaded
* or the encrypted filesystem is not mounted the error will be EACCES. * or the encrypted filesystem is not mounted the error will be EACCES.
* In that case do not return an error. * In that case report an error in the head filesystem and return.
*/ */
if (error == EACCES) { if (error == EACCES) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (0); zbookmark_phys_t zb;
} zep_to_zb(head_ds, zep, &zb);
if (error) { error = copyout_entry(&zb, uaddr, count);
if (error != 0) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (error); return (error);
} }
if (zep->zb_birth == latest_txg) { return (0);
}
/*
* If find_birth_txg() errors out otherwise, let txg_to_consider be
* equal to the spa's syncing txg: if check_filesystem() errors out
* then affected snapshots or clones will not be checked.
*/
if (error == 0 && zep->zb_birth == latest_txg) {
/* Block neither free nor rewritten. */ /* Block neither free nor rewritten. */
zbookmark_phys_t zb; zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb); zep_to_zb(head_ds, zep, &zb);
@ -359,44 +337,55 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
return (error); return (error);
} }
check_snapshot = B_FALSE; check_snapshot = B_FALSE;
} else { } else if (error == 0) {
ASSERT3U(zep->zb_birth, <, latest_txg);
txg_to_consider = latest_txg; txg_to_consider = latest_txg;
} }
/* How many snapshots reference this block. */ /*
uint64_t snap_count; * Retrieve the number of snapshots if the dataset is not a snapshot.
*/
uint64_t snap_count = 0;
if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
error = zap_count(spa->spa_meta_objset, error = zap_count(spa->spa_meta_objset,
dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
if (error != 0) { if (error != 0) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (error); return (error);
} }
if (snap_count == 0) { if (snap_count == 0) {
/* File system has no snapshot. */ /* Filesystem without snapshots. */
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (0); return (0);
} }
}
uint64_t *snap_obj_array = kmem_alloc(snap_count * sizeof (uint64_t), uint64_t *snap_obj_array = kmem_zalloc(snap_count * sizeof (uint64_t),
KM_SLEEP); KM_SLEEP);
int aff_snap_count = 0; int aff_snap_count = 0;
uint64_t snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; uint64_t snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; uint64_t snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
uint64_t zap_clone = dsl_dir_phys(ds->ds_dir)->dd_clones;
dsl_dataset_rele(ds, FTAG);
/* Check only snapshots created from this file system. */ /* Check only snapshots created from this file system. */
while (snap_obj != 0 && zep->zb_birth < snap_obj_txg && while (snap_obj != 0 && zep->zb_birth < snap_obj_txg &&
snap_obj_txg <= txg_to_consider) { snap_obj_txg <= txg_to_consider) {
dsl_dataset_rele(ds, FTAG);
error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds); error = dsl_dataset_hold_obj(dp, snap_obj, FTAG, &ds);
if (error != 0) if (error != 0)
goto out; goto out;
if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds) if (dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj != head_ds) {
break; snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
dsl_dataset_rele(ds, FTAG);
continue;
}
boolean_t affected = B_TRUE; boolean_t affected = B_TRUE;
if (check_snapshot) { if (check_snapshot) {
@ -405,6 +394,7 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
affected = (error == 0 && zep->zb_birth == blk_txg); affected = (error == 0 && zep->zb_birth == blk_txg);
} }
/* Report errors in snapshots. */
if (affected) { if (affected) {
snap_obj_array[aff_snap_count] = snap_obj; snap_obj_array[aff_snap_count] = snap_obj;
aff_snap_count++; aff_snap_count++;
@ -416,34 +406,74 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
goto out; goto out;
} }
}
snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
dsl_dataset_rele(ds, FTAG);
}
if (zap_clone != 0 && aff_snap_count > 0) {
error = check_clones(spa, zap_clone, snap_count, snap_obj_array,
zep, uaddr, count);
}
out:
kmem_free(snap_obj_array, sizeof (*snap_obj_array));
return (error);
}
/*
* Clone checking.
*/
static int check_clones(spa_t *spa, uint64_t zap_clone, uint64_t snap_count,
uint64_t *snap_obj_array, zbookmark_err_phys_t *zep, void* uaddr,
uint64_t *count)
{
int error = 0;
zap_cursor_t *zc;
zap_attribute_t *za;
zc = kmem_zalloc(sizeof (zap_cursor_t), KM_SLEEP);
za = kmem_zalloc(sizeof (zap_attribute_t), KM_SLEEP);
for (zap_cursor_init(zc, spa->spa_meta_objset, zap_clone);
zap_cursor_retrieve(zc, za) == 0;
zap_cursor_advance(zc)) {
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_dataset_t *clone;
error = dsl_dataset_hold_obj(dp, za->za_first_integer,
FTAG, &clone);
if (error != 0)
break;
/* /*
* Only clones whose origins were affected could also * Only clones whose origins were affected could also
* have affected snapshots. * have affected snapshots.
*/ */
zap_cursor_t zc; boolean_t found = B_FALSE;
zap_attribute_t za; for (int i = 0; i < snap_count; i++) {
for (zap_cursor_init(&zc, spa->spa_meta_objset, if (dsl_dir_phys(clone->ds_dir)->dd_origin_obj
dsl_dataset_phys(ds)->ds_next_clones_obj); == snap_obj_array[i])
zap_cursor_retrieve(&zc, &za) == 0; found = B_TRUE;
zap_cursor_advance(&zc)) { }
error = check_filesystem(spa, dsl_dataset_rele(clone, FTAG);
za.za_first_integer, zep, uaddr, count);
if (error != 0) { if (!found)
zap_cursor_fini(&zc); continue;
goto out;
} error = check_filesystem(spa, za->za_first_integer, zep,
} uaddr, count);
zap_cursor_fini(&zc);
} if (error != 0)
snap_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; break;
snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; }
}
dsl_dataset_rele(ds, FTAG); kmem_free(za, sizeof (*za));
kmem_free(zc, sizeof (*zc));
zap_cursor_fini(zc);
out:
kmem_free(snap_obj_array, sizeof (*snap_obj_array));
return (error); return (error);
} }
@ -474,12 +504,13 @@ process_error_block(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
void *uaddr, uint64_t *count) void *uaddr, uint64_t *count)
{ {
/* /*
* If the zb_birth is 0 it means we failed to retrieve the birth txg * If zb_birth == 0 or head_ds == 0 it means we failed to retrieve the
* of the block pointer. This happens when an encrypted filesystem is * birth txg or the head filesystem of the block pointer. This may
* not mounted or when the key is not loaded. Do not proceed to * happen e.g. when an encrypted filesystem is not mounted or when
* the key is not loaded. In this case do not proceed to
* check_filesystem(), instead do the accounting here. * check_filesystem(), instead do the accounting here.
*/ */
if (zep->zb_birth == 0) { if (zep->zb_birth == 0 || head_ds == 0) {
zbookmark_phys_t zb; zbookmark_phys_t zb;
zep_to_zb(head_ds, zep, &zb); zep_to_zb(head_ds, zep, &zb);
int error = copyout_entry(&zb, uaddr, count); int error = copyout_entry(&zb, uaddr, count);
@ -697,11 +728,10 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
zep.zb_birth = 0; zep.zb_birth = 0;
/* /*
* We cannot use get_head_and_birth_txg() because it will * In case of an error we should simply continue instead of
* acquire the pool config lock, which we already have. In case * returning prematurely. See the next comment.
* of an error we simply continue.
*/ */
uint64_t head_dataset_obj; uint64_t head_ds;
dsl_pool_t *dp = spa->spa_dsl_pool; dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_dataset_t *ds; dsl_dataset_t *ds;
objset_t *os; objset_t *os;
@ -710,8 +740,7 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
if (error != 0) if (error != 0)
continue; continue;
head_dataset_obj = head_ds = dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj;
/* /*
* The objset and the dnode are required for getting the block * The objset and the dnode are required for getting the block
@ -751,14 +780,14 @@ sync_upgrade_errlog(spa_t *spa, uint64_t spa_err_obj, uint64_t *newobj,
uint64_t err_obj; uint64_t err_obj;
error = zap_lookup_int_key(spa->spa_meta_objset, *newobj, error = zap_lookup_int_key(spa->spa_meta_objset, *newobj,
head_dataset_obj, &err_obj); head_ds, &err_obj);
if (error == ENOENT) { if (error == ENOENT) {
err_obj = zap_create(spa->spa_meta_objset, err_obj = zap_create(spa->spa_meta_objset,
DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx);
(void) zap_update_int_key(spa->spa_meta_objset, (void) zap_update_int_key(spa->spa_meta_objset,
*newobj, head_dataset_obj, err_obj, tx); *newobj, head_ds, err_obj, tx);
} }
char buf[64]; char buf[64];
@ -875,20 +904,21 @@ process_error_list(spa_t *spa, avl_tree_t *list, void *uaddr, uint64_t *count)
} }
for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) { for (se = avl_first(list); se != NULL; se = AVL_NEXT(list, se)) {
zbookmark_err_phys_t zep; uint64_t head_ds = 0;
zep.zb_object = se->se_bookmark.zb_object; int error = get_head_ds(spa, se->se_bookmark.zb_objset,
zep.zb_level = se->se_bookmark.zb_level; &head_ds);
zep.zb_blkid = se->se_bookmark.zb_blkid;
zep.zb_birth = 0;
uint64_t head_ds_obj; /*
int error = get_head_and_birth_txg(spa, &zep, * If get_head_ds() errors out, set the head filesystem
se->se_bookmark.zb_objset, &head_ds_obj); * to the filesystem stored in the bookmark of the
* error block.
*/
if (error != 0)
head_ds = se->se_bookmark.zb_objset;
if (!error) error = process_error_block(spa, head_ds,
error = process_error_block(spa, head_ds_obj, &zep, &se->se_zep, uaddr, count);
uaddr, count); if (error != 0)
if (error)
return (error); return (error);
} }
return (0); return (0);
@ -914,8 +944,9 @@ spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count)
#ifdef _KERNEL #ifdef _KERNEL
/* /*
* The pool config lock is needed to hold a dataset_t via (among other * The pool config lock is needed to hold a dataset_t via (among other
* places) process_error_list() -> get_head_and_birth_txg(), and lock * places) process_error_list() -> process_error_block()->
* ordering requires that we get it before the spa_errlog_lock. * find_top_affected_fs(), and lock ordering requires that we get it
* before the spa_errlog_lock.
*/ */
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);
mutex_enter(&spa->spa_errlog_lock); mutex_enter(&spa->spa_errlog_lock);
@ -1011,34 +1042,33 @@ sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj, dmu_tx_t *tx)
} else { } else {
for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) { for (se = avl_first(t); se != NULL; se = AVL_NEXT(t, se)) {
zbookmark_err_phys_t zep; zbookmark_err_phys_t zep;
zep.zb_object = se->se_bookmark.zb_object; zep.zb_object = se->se_zep.zb_object;
zep.zb_level = se->se_bookmark.zb_level; zep.zb_level = se->se_zep.zb_level;
zep.zb_blkid = se->se_bookmark.zb_blkid; zep.zb_blkid = se->se_zep.zb_blkid;
zep.zb_birth = 0; zep.zb_birth = se->se_zep.zb_birth;
uint64_t head_ds = 0;
int error = get_head_ds(spa, se->se_bookmark.zb_objset,
&head_ds);
/* /*
* If we cannot find out the head dataset and birth txg * If get_head_ds() errors out, set the head filesystem
* of the present error block, we simply continue. * to the filesystem stored in the bookmark of the
* Reinserting that error block to the error lists, * error block.
* even if we are not syncing the final txg, results
* in duplicate posting of errors.
*/ */
uint64_t head_dataset_obj; if (error != 0)
int error = get_head_and_birth_txg(spa, &zep, head_ds = se->se_bookmark.zb_objset;
se->se_bookmark.zb_objset, &head_dataset_obj);
if (error)
continue;
uint64_t err_obj; uint64_t err_obj;
error = zap_lookup_int_key(spa->spa_meta_objset, error = zap_lookup_int_key(spa->spa_meta_objset,
*obj, head_dataset_obj, &err_obj); *obj, head_ds, &err_obj);
if (error == ENOENT) { if (error == ENOENT) {
err_obj = zap_create(spa->spa_meta_objset, err_obj = zap_create(spa->spa_meta_objset,
DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx); DMU_OT_ERROR_LOG, DMU_OT_NONE, 0, tx);
(void) zap_update_int_key(spa->spa_meta_objset, (void) zap_update_int_key(spa->spa_meta_objset,
*obj, head_dataset_obj, err_obj, tx); *obj, head_ds, err_obj, tx);
} }
errphys_to_name(&zep, buf, sizeof (buf)); errphys_to_name(&zep, buf, sizeof (buf));
@ -1108,7 +1138,7 @@ spa_errlog_sync(spa_t *spa, uint64_t txg)
/* /*
* The pool config lock is needed to hold a dataset_t via * The pool config lock is needed to hold a dataset_t via
* sync_error_list() -> get_head_and_birth_txg(), and lock ordering * sync_error_list() -> get_head_ds(), and lock ordering
* requires that we get it before the spa_errlog_lock. * requires that we get it before the spa_errlog_lock.
*/ */
dsl_pool_config_enter(spa->spa_dsl_pool, FTAG); dsl_pool_config_enter(spa->spa_dsl_pool, FTAG);

View File

@ -570,7 +570,8 @@ error:
if (ret == ECKSUM) { if (ret == ECKSUM) {
zio->io_error = SET_ERROR(EIO); zio->io_error = SET_ERROR(EIO);
if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) { if ((zio->io_flags & ZIO_FLAG_SPECULATIVE) == 0) {
spa_log_error(spa, &zio->io_bookmark); spa_log_error(spa, &zio->io_bookmark,
&zio->io_bp->blk_birth);
(void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION, (void) zfs_ereport_post(FM_EREPORT_ZFS_AUTHENTICATION,
spa, NULL, &zio->io_bookmark, zio, 0); spa, NULL, &zio->io_bookmark, zio, 0);
} }
@ -4718,7 +4719,8 @@ zio_done(zio_t *zio)
* For logical I/O requests, tell the SPA to log the * For logical I/O requests, tell the SPA to log the
* error and generate a logical data ereport. * error and generate a logical data ereport.
*/ */
spa_log_error(zio->io_spa, &zio->io_bookmark); spa_log_error(zio->io_spa, &zio->io_bookmark,
&zio->io_bp->blk_birth);
(void) zfs_ereport_post(FM_EREPORT_ZFS_DATA, (void) zfs_ereport_post(FM_EREPORT_ZFS_DATA,
zio->io_spa, NULL, &zio->io_bookmark, zio, 0); zio->io_spa, NULL, &zio->io_bookmark, zio, 0);
} }

View File

@ -494,7 +494,8 @@ tags = ['functional', 'cli_root', 'zpool_split']
[tests/functional/cli_root/zpool_status] [tests/functional/cli_root/zpool_status]
tests = ['zpool_status_001_pos', 'zpool_status_002_pos', tests = ['zpool_status_001_pos', 'zpool_status_002_pos',
'zpool_status_003_pos', 'zpool_status_004_pos', 'zpool_status_003_pos', 'zpool_status_004_pos',
'zpool_status_005_pos', 'zpool_status_features_001_pos'] 'zpool_status_005_pos', 'zpool_status_006_pos',
'zpool_status_007_pos', 'zpool_status_features_001_pos']
tags = ['functional', 'cli_root', 'zpool_status'] tags = ['functional', 'cli_root', 'zpool_status']
[tests/functional/cli_root/zpool_sync] [tests/functional/cli_root/zpool_sync]

View File

@ -1169,6 +1169,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_status/zpool_status_003_pos.ksh \ functional/cli_root/zpool_status/zpool_status_003_pos.ksh \
functional/cli_root/zpool_status/zpool_status_004_pos.ksh \ functional/cli_root/zpool_status/zpool_status_004_pos.ksh \
functional/cli_root/zpool_status/zpool_status_005_pos.ksh \ functional/cli_root/zpool_status/zpool_status_005_pos.ksh \
functional/cli_root/zpool_status/zpool_status_006_pos.ksh \
functional/cli_root/zpool_status/zpool_status_007_pos.ksh \
functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \ functional/cli_root/zpool_status/zpool_status_features_001_pos.ksh \
functional/cli_root/zpool_sync/cleanup.ksh \ functional/cli_root/zpool_sync/cleanup.ksh \
functional/cli_root/zpool_sync/setup.ksh \ functional/cli_root/zpool_sync/setup.ksh \

View File

@ -61,11 +61,13 @@ dd if=/$TESTPOOL2/10m_file bs=1M || true
log_must zfs snapshot $TESTPOOL2@snap log_must zfs snapshot $TESTPOOL2@snap
log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone log_must zfs clone $TESTPOOL2@snap $TESTPOOL2/clone
log_must zfs create $TESTPOOL2/$TESTFS1
# Look to see that snapshot, clone and filesystem our files report errors # Look to see that snapshot, clone and filesystem our files report errors
log_must zpool status -v $TESTPOOL2 log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2@snap:/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/clone/10m_file'"
log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'" log_must eval "zpool status -v | grep '$TESTPOOL2/10m_file'"
log_mustnot eval "zpool status -v | grep '$TESTFS1'"
log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone" log_pass "'zpool status -v' outputs affected filesystem, snapshot & clone"

View File

@ -24,7 +24,6 @@
# Copyright (c) 2022 George Amanakis. All rights reserved. # Copyright (c) 2022 George Amanakis. All rights reserved.
# #
. $STF_SUITE/include/libtest.shlib
# #
# DESCRIPTION: # DESCRIPTION:
# Verify correct output with 'zpool status -v' after corrupting a file # Verify correct output with 'zpool status -v' after corrupting a file
@ -34,7 +33,12 @@
# 2. zinject checksum errors # 2. zinject checksum errors
# 3. Unmount the filesystem and unload the key # 3. Unmount the filesystem and unload the key
# 4. Scrub the pool # 4. Scrub the pool
# 5. Verify we report errors in the pool in 'zpool status -v' # 5. Verify we report that errors were detected but we do not report
# the filename since the key is not loaded.
# 6. Load the key and mount the encrypted fs.
# 7. Verify we report errors in the pool in 'zpool status -v'
. $STF_SUITE/include/libtest.shlib
verify_runnable "both" verify_runnable "both"
@ -66,13 +70,21 @@ log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
log_must eval "echo 'aaaaaaaa' >> "$file log_must eval "echo 'aaaaaaaa' >> "$file
corrupt_blocks_at_level $file 0 corrupt_blocks_at_level $file 0
log_must zfs unmount $TESTPOOL2/$TESTFS1 log_must zfs umount $TESTPOOL2/$TESTFS1
log_must zfs unload-key $TESTPOOL2/$TESTFS1 log_must zfs unload-key -a
log_must zpool sync $TESTPOOL2 log_must zpool sync $TESTPOOL2
log_must zpool scrub $TESTPOOL2 log_must zpool scrub $TESTPOOL2
log_must zpool wait -t scrub $TESTPOOL2 log_must zpool wait -t scrub $TESTPOOL2
log_must zpool status -v $TESTPOOL2 log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \ log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\"" grep \"Permanent errors have been detected\""
log_mustnot eval "zpool status -v $TESTPOOL2 | grep '$file'"
log_must eval "cat /$TESTPOOL2/pwd | zfs load-key $TESTPOOL2/$TESTFS1"
log_must zfs mount $TESTPOOL2/$TESTFS1
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v $TESTPOOL2 | grep '$file'"
log_pass "Verify reporting errors with unloaded keys works" log_pass "Verify reporting errors with unloaded keys works"

View File

@ -0,0 +1,97 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 George Amanakis. All rights reserved.
#
#
# DESCRIPTION:
# Verify reporting errors when deleting files
#
# STRATEGY:
# 1. Create a pool, and a file
# 2. zinject checksum errors
# 3. Create snapshots and clones like:
# fs->snap1->clone1->snap2->clone2->...
# 4. Read the original file and immediately delete it
# 5. Delete the file in clone2
# 6. Snapshot clone2->snapxx and clone into snapxx->clonexx
# 7. Verify we report errors in the pool in 'zpool status -v'
# 8. Promote clone1
# 9. Verify we report errors in the pool in 'zpool status -v'
. $STF_SUITE/include/libtest.shlib
verify_runnable "both"
function cleanup
{
log_must zinject -c all
destroy_pool $TESTPOOL2
rm -f $TESTDIR/vdev_a
}
log_assert "Verify reporting errors when deleting files"
log_onexit cleanup
typeset file="/$TESTPOOL2/$TESTFILE0"
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
log_must zpool create -f -o feature@head_errlog=enabled $TESTPOOL2 $TESTDIR/vdev_a
log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
log_must zinject -t data -e checksum -f 100 -am $file
for i in {1..3}; do
lastfs="$(zfs list -r $TESTPOOL2 | tail -1 | awk '{print $1}')"
log_must zfs snap $lastfs@snap$i
log_must zfs clone $lastfs@snap$i $TESTPOOL2/clone$i
done
log_mustnot dd if=$file of=/dev/null bs=1024
log_must rm $file /$TESTPOOL2/clone2/$TESTFILE0
log_must zfs snap $TESTPOOL2/clone2@snapxx
log_must zfs clone $TESTPOOL2/clone2@snapxx $TESTPOOL2/clonexx
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
log_must zfs promote $TESTPOOL2/clone1
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
log_pass "Verify reporting errors when deleting files"

View File

@ -0,0 +1,98 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023 George Amanakis. All rights reserved.
#
#
# DESCRIPTION:
# Verify reporting errors when deleting corrupted files after scrub
#
# STRATEGY:
# 1. Create a pool, and a file
# 2. Corrupt the file
# 3. Create snapshots and clones like:
# fs->snap1->clone1->snap2->clone2->...
# 4. Read the original file and immediately delete it
# 5. Delete the file in clone2
# 6. Snapshot clone2->snapxx and clone into snapxx->clonexx
# 7. Verify we report errors in the pool in 'zpool status -v'
# 8. Promote clone1
# 9. Verify we report errors in the pool in 'zpool status -v'
. $STF_SUITE/include/libtest.shlib
verify_runnable "both"
function cleanup
{
destroy_pool $TESTPOOL2
rm -f $TESTDIR/vdev_a
}
log_assert "Verify reporting errors when deleting corrupted files after scrub"
log_onexit cleanup
typeset file="/$TESTPOOL2/$TESTFS1/$TESTFILE0"
truncate -s $MINVDEVSIZE $TESTDIR/vdev_a
log_must zpool create -f $TESTPOOL2 $TESTDIR/vdev_a
log_must zfs create -o primarycache=none $TESTPOOL2/$TESTFS1
log_must dd if=/dev/urandom of=$file bs=1024 count=1024 oflag=sync
corrupt_blocks_at_level $file 0
lastfs="$(zfs list -r $TESTPOOL2 | tail -1 | awk '{print $1}')"
for i in {1..3}; do
log_must zfs snap $lastfs@snap$i
log_must zfs clone $lastfs@snap$i $TESTPOOL2/clone$i
lastfs="$(zfs list -r $TESTPOOL2/clone$i | tail -1 | awk '{print $1}')"
done
log_must zpool scrub -w $TESTPOOL2
log_must rm $file /$TESTPOOL2/clone2/$TESTFILE0
log_must zfs snap $TESTPOOL2/clone2@snapxx
log_must zfs clone $TESTPOOL2/clone2@snapxx $TESTPOOL2/clonexx
log_must zpool status -v $TESTPOOL2
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2/$TESTFS1@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
log_must zfs promote $TESTPOOL2/clone1
log_must eval "zpool status -v $TESTPOOL2 | \
grep \"Permanent errors have been detected\""
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap1:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone1@snap2:/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clone2/$TESTFILE0'"
log_mustnot eval "zpool status -v | grep '$TESTPOOL2/clonexx/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone2@snap3:/$TESTFILE0'"
log_must eval "zpool status -v | grep '$TESTPOOL2/clone3/$TESTFILE0'"
log_pass "Verify reporting errors when deleting corrupted files after scrub"