ZIL: Assert record sizes in different places

This should make sure we have log written without overflows.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by:	Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes #15517
This commit is contained in:
Alexander Motin 2023-11-28 16:35:14 -05:00 committed by GitHub
parent b94ce4e17d
commit 2a27fd4111
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 115 additions and 37 deletions

View File

@ -1251,7 +1251,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
iovec_t *dst_iovecs;
zil_chain_t *zilc;
lr_t *lr;
uint64_t txtype, lr_len;
uint64_t txtype, lr_len, nused;
uint_t crypt_len, nr_iovecs, vec;
uint_t aad_len = 0, total_len = 0;
@ -1268,7 +1268,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
zilc = (zil_chain_t *)src;
slrp = src + sizeof (zil_chain_t);
aadp = aadbuf;
blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
ASSERT3U(nused, >=, sizeof (zil_chain_t));
ASSERT3U(nused, <=, datalen);
blkend = src + nused;
/*
* Calculate the number of encrypted iovecs we will need.
@ -1287,6 +1290,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
txtype = lr->lrc_txtype;
lr_len = lr->lrc_reclen;
}
ASSERT3U(lr_len, >=, sizeof (lr_t));
ASSERT3U(lr_len, <=, blkend - slrp);
nr_iovecs++;
if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))

View File

@ -1405,7 +1405,7 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
boolean_t *no_crypt)
{
int ret;
uint64_t txtype, lr_len;
uint64_t txtype, lr_len, nused;
uint_t nr_src, nr_dst, crypt_len;
uint_t aad_len = 0, nr_iovecs = 0, total_len = 0;
iovec_t *src_iovecs = NULL, *dst_iovecs = NULL;
@ -1432,7 +1432,10 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
zilc = (zil_chain_t *)src;
slrp = src + sizeof (zil_chain_t);
aadp = aadbuf;
blkend = src + ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
nused = ((byteswap) ? BSWAP_64(zilc->zc_nused) : zilc->zc_nused);
ASSERT3U(nused, >=, sizeof (zil_chain_t));
ASSERT3U(nused, <=, datalen);
blkend = src + nused;
/* calculate the number of encrypted iovecs we will need */
for (; slrp < blkend; slrp += lr_len) {
@ -1445,6 +1448,8 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
txtype = BSWAP_64(lr->lrc_txtype);
lr_len = BSWAP_64(lr->lrc_reclen);
}
ASSERT3U(lr_len, >=, sizeof (lr_t));
ASSERT3U(lr_len, <=, blkend - slrp);
nr_iovecs++;
if (txtype == TX_WRITE && lr_len != sizeof (lr_write_t))

View File

@ -309,6 +309,8 @@ zfs_replay_create_acl(void *arg1, void *arg2, boolean_t byteswap)
uint64_t dnodesize;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lracl));
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
byteswap_uint64_array(lracl, sizeof (*lracl));
@ -470,6 +472,8 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
uint64_t dnodesize;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
@ -613,6 +617,8 @@ zfs_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
int error;
int vflg = 0;
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -648,6 +654,8 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
int error;
int vflg = 0;
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -715,12 +723,14 @@ zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
{
zfsvfs_t *zfsvfs = arg1;
lr_rename_t *lr = arg2;
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
}
@ -730,12 +740,14 @@ zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
#ifdef __linux__
zfsvfs_t *zfsvfs = arg1;
lr_rename_t *lr = arg2;
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
NULL));
#else
@ -750,14 +762,13 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
zfsvfs_t *zfsvfs = arg1;
lr_rename_whiteout_t *lr = arg2;
int error;
/* sname and tname follow lr_rename_whiteout_t */
char *sname = (char *)(lr + 1);
char *tname = sname + strlen(sname) + 1;
/* For the whiteout file. */
xvattr_t xva;
uint64_t objid;
uint64_t dnodesize;
ASSERT3U(lr->lr_rename.lr_common.lrc_reclen, >, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -783,6 +794,9 @@ zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
if (error)
return (error);
/* sname and tname follow lr_rename_whiteout_t */
char *sname = (char *)(lr + 1);
char *tname = sname + strlen(sname) + 1;
return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
RENAME_WHITEOUT, &xva.xva_vattr));
#else
@ -800,6 +814,8 @@ zfs_replay_write(void *arg1, void *arg2, boolean_t byteswap)
int error;
uint64_t eod, offset, length;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -863,6 +879,8 @@ zfs_replay_write2(void *arg1, void *arg2, boolean_t byteswap)
int error;
uint64_t end;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -910,6 +928,8 @@ zfs_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
flock64_t fl = {0};
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -940,6 +960,8 @@ zfs_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
int error;
void *start;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
xva_init(&xva);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
@ -1002,6 +1024,9 @@ zfs_replay_setsaxattr(void *arg1, void *arg2, boolean_t byteswap)
size_t size;
int error = 0;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lr->lr_common.lrc_reclen, >, sizeof (*lr) + lr->lr_size);
ASSERT(spa_feature_is_active(zfsvfs->z_os->os_spa,
SPA_FEATURE_ZILSAXATTR));
if (byteswap)
@ -1079,6 +1104,10 @@ zfs_replay_acl_v0(void *arg1, void *arg2, boolean_t byteswap)
znode_t *zp;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) +
sizeof (ace_t) * lr->lr_aclcnt);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
@ -1124,6 +1153,9 @@ zfs_replay_acl(void *arg1, void *arg2, boolean_t byteswap)
znode_t *zp;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr) + lr->lr_acl_bytes);
if (byteswap) {
byteswap_uint64_array(lr, sizeof (*lr));
zfs_ace_byteswap(ace, lr->lr_acl_bytes, B_FALSE);
@ -1171,6 +1203,10 @@ zfs_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
znode_t *zp;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t,
lr_bps[lr->lr_nbps]));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

View File

@ -514,6 +514,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
lr_t *lr = (lr_t *)lrp;
reclen = lr->lrc_reclen;
ASSERT3U(reclen, >=, sizeof (lr_t));
ASSERT3U(reclen, <=, end - lrp);
if (lr->lrc_seq > claim_lr_seq) {
arc_buf_destroy(abuf, &abuf);
goto done;
@ -596,7 +597,7 @@ zil_claim_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t first_txg)
lr_write_t *lr = (lr_write_t *)lrc;
int error;
ASSERT(lrc->lrc_txtype == TX_WRITE);
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
/*
* If the block is not readable, don't claim it. This can happen
@ -623,7 +624,9 @@ zil_claim_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx)
spa_t *spa;
uint_t ii;
ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE);
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t,
lr_bps[lr->lr_nbps]));
if (tx == NULL) {
return (0);
@ -683,7 +686,7 @@ zil_free_write(zilog_t *zilog, const lr_t *lrc, void *tx, uint64_t claim_txg)
lr_write_t *lr = (lr_write_t *)lrc;
blkptr_t *bp = &lr->lr_blkptr;
ASSERT(lrc->lrc_txtype == TX_WRITE);
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
/*
* If we previously claimed it, we need to free it.
@ -704,7 +707,9 @@ zil_free_clone_range(zilog_t *zilog, const lr_t *lrc, void *tx)
spa_t *spa;
uint_t ii;
ASSERT(lrc->lrc_txtype == TX_CLONE_RANGE);
ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t,
lr_bps[lr->lr_nbps]));
if (tx == NULL) {
return (0);
@ -1786,6 +1791,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
itx = list_next(&lwb->lwb_itxs, itx))
zil_lwb_commit(zilog, lwb, itx);
lwb->lwb_nused = lwb->lwb_nfilled;
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb,
ZIO_FLAG_CANFAIL);
@ -2015,13 +2021,16 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
return (lwb);
}
reclen = lr->lrc_reclen;
if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
ASSERT3U(reclen, ==, sizeof (lr_write_t));
dlen = P2ROUNDUP_TYPED(
lrw->lr_length, sizeof (uint64_t), uint64_t);
} else {
ASSERT3U(reclen, >=, sizeof (lr_t));
dlen = 0;
}
reclen = lr->lrc_reclen;
ASSERT3U(reclen, <=, zil_max_log_data(zilog, 0));
zilog->zl_cur_used += (reclen + dlen);
cont:
@ -2040,18 +2049,18 @@ cont:
if (lwb == NULL)
return (NULL);
lwb_sp = lwb->lwb_nmax - lwb->lwb_nused;
}
/*
* There must be enough space in the new, empty log block to
* hold reclen. For WR_COPIED, we need to fit the whole
* record in one block, and reclen is the header size + the
* data size. For WR_NEED_COPY, we can create multiple
* records, splitting the data into multiple blocks, so we
* only need to fit one word of data per block; in this case
* reclen is just the header size (no data).
* There must be enough space in the log block to hold reclen.
* For WR_COPIED, we need to fit the whole record in one block,
* and reclen is the write record header size + the data size.
* For WR_NEED_COPY, we can create multiple records, splitting
* the data into multiple blocks, so we only need to fit one
* word of data per block; in this case reclen is just the header
* size (no data).
*/
ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
}
dnow = MIN(dlen, lwb_sp - reclen);
if (dlen > dnow) {
@ -2228,7 +2237,9 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
size_t itxsize, lrsize;
itx_t *itx;
ASSERT3U(olrsize, >=, sizeof (lr_t));
lrsize = P2ROUNDUP_TYPED(olrsize, sizeof (uint64_t), size_t);
ASSERT3U(lrsize, >=, olrsize);
itxsize = offsetof(itx_t, itx_lr) + lrsize;
itx = zio_data_buf_alloc(itxsize);
@ -2247,6 +2258,10 @@ zil_itx_create(uint64_t txtype, size_t olrsize)
static itx_t *
zil_itx_clone(itx_t *oitx)
{
ASSERT3U(oitx->itx_size, >=, sizeof (itx_t));
ASSERT3U(oitx->itx_size, ==,
offsetof(itx_t, itx_lr) + oitx->itx_lr.lrc_reclen);
itx_t *itx = zio_data_buf_alloc(oitx->itx_size);
memcpy(itx, oitx, oitx->itx_size);
itx->itx_callback = NULL;
@ -2257,6 +2272,9 @@ zil_itx_clone(itx_t *oitx)
void
zil_itx_destroy(itx_t *itx)
{
ASSERT3U(itx->itx_size, >=, sizeof (itx_t));
ASSERT3U(itx->itx_lr.lrc_reclen, ==,
itx->itx_size - offsetof(itx_t, itx_lr));
IMPLY(itx->itx_lr.lrc_txtype == TX_COMMIT, itx->itx_callback == NULL);
IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT);
@ -2340,7 +2358,7 @@ void
zil_remove_async(zilog_t *zilog, uint64_t oid)
{
uint64_t otxg, txg;
itx_async_node_t *ian;
itx_async_node_t *ian, ian_search;
avl_tree_t *t;
avl_index_t where;
list_t clean_list;
@ -2367,7 +2385,8 @@ zil_remove_async(zilog_t *zilog, uint64_t oid)
* Locate the object node and append its list.
*/
t = &itxg->itxg_itxs->i_async_tree;
ian = avl_find(t, &oid, &where);
ian_search.ia_foid = oid;
ian = avl_find(t, &ian_search, &where);
if (ian != NULL)
list_move_tail(&clean_list, &ian->ia_list);
mutex_exit(&itxg->itxg_lock);
@ -2565,7 +2584,7 @@ void
zil_async_to_sync(zilog_t *zilog, uint64_t foid)
{
uint64_t otxg, txg;
itx_async_node_t *ian;
itx_async_node_t *ian, ian_search;
avl_tree_t *t;
avl_index_t where;
@ -2595,7 +2614,8 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid)
*/
t = &itxg->itxg_itxs->i_async_tree;
if (foid != 0) {
ian = avl_find(t, &foid, &where);
ian_search.ia_foid = foid;
ian = avl_find(t, &ian_search, &where);
if (ian != NULL) {
list_move_tail(&itxg->itxg_itxs->i_sync_list,
&ian->ia_list);

View File

@ -363,11 +363,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
zil_chain_t zilc;
abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
uint64_t);
uint64_t nused = P2ROUNDUP_TYPED(zilc.zc_nused,
ZIL_MIN_BLKSZ, uint64_t);
ASSERT3U(size, >=, nused);
size = nused;
eck = zilc.zc_eck;
eck_offset = offsetof(zil_chain_t, zc_eck);
} else {
ASSERT3U(size, >=, sizeof (zio_eck_t));
eck_offset = size - sizeof (zio_eck_t);
abd_copy_to_buf_off(&eck, abd, eck_offset,
sizeof (zio_eck_t));
@ -448,12 +451,13 @@ zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
return (SET_ERROR(ECKSUM));
}
if (nused > size) {
nused = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
if (size < nused)
return (SET_ERROR(ECKSUM));
}
size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
size = nused;
} else {
if (size < sizeof (zio_eck_t))
return (SET_ERROR(ECKSUM));
eck_offset = size - sizeof (zio_eck_t);
abd_copy_to_buf_off(&eck, abd, eck_offset,
sizeof (zio_eck_t));

View File

@ -446,6 +446,8 @@ zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
lr_truncate_t *lr = arg2;
uint64_t offset, length;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -482,6 +484,8 @@ zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
dmu_tx_t *tx;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@ -535,6 +539,10 @@ zvol_replay_clone_range(void *arg1, void *arg2, boolean_t byteswap)
uint_t ii;
int error;
ASSERT3U(lr->lr_common.lrc_reclen, >=, sizeof (*lr));
ASSERT3U(lr->lr_common.lrc_reclen, >=, offsetof(lr_clone_range_t,
lr_bps[lr->lr_nbps]));
dmu_objset_name(os, name);
cmn_err(CE_WARN, "ZFS dropping block cloning transaction for %s.",
name);