Illumos 4370, 4371
4370 avoid transmitting holes during zfs send 4371 DMU code clean up Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net> Approved by: Garrett D'Amore <garrett@damore.org>a References: https://www.illumos.org/issues/4370 https://www.illumos.org/issues/4371 https://github.com/illumos/illumos-gate/commit/43466aa Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2529
This commit is contained in:
parent
fa86b5dbb6
commit
b0bc7a84d9
|
@ -767,7 +767,7 @@ dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
|
|||
if (ddp->ddp_phys_birth == 0)
|
||||
continue;
|
||||
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
|
||||
sprintf_blkptr(blkbuf, &blk);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
|
||||
(void) printf("index %llx refcnt %llu %s %s\n",
|
||||
(u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
|
||||
types[p], blkbuf);
|
||||
|
@ -1036,32 +1036,40 @@ blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
|
|||
}
|
||||
|
||||
static void
|
||||
sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
|
||||
snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
|
||||
{
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
|
||||
int i;
|
||||
|
||||
if (dump_opt['b'] >= 6) {
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, buflen, bp);
|
||||
return;
|
||||
}
|
||||
|
||||
blkbuf[0] = '\0';
|
||||
|
||||
for (i = 0; i < ndvas; i++)
|
||||
(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
|
||||
(void) snprintf(blkbuf + strlen(blkbuf),
|
||||
buflen - strlen(blkbuf), "%llu:%llx:%llx ",
|
||||
(u_longlong_t)DVA_GET_VDEV(&dva[i]),
|
||||
(u_longlong_t)DVA_GET_OFFSET(&dva[i]),
|
||||
(u_longlong_t)DVA_GET_ASIZE(&dva[i]));
|
||||
|
||||
(void) sprintf(blkbuf + strlen(blkbuf),
|
||||
"%llxL/%llxP F=%llu B=%llu/%llu",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp),
|
||||
(u_longlong_t)BP_GET_PSIZE(bp),
|
||||
(u_longlong_t)bp->blk_fill,
|
||||
(u_longlong_t)bp->blk_birth,
|
||||
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
(void) snprintf(blkbuf + strlen(blkbuf),
|
||||
buflen - strlen(blkbuf), "B=%llu",
|
||||
(u_longlong_t)bp->blk_birth);
|
||||
} else {
|
||||
(void) snprintf(blkbuf + strlen(blkbuf),
|
||||
buflen - strlen(blkbuf),
|
||||
"%llxL/%llxP F=%llu B=%llu/%llu",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp),
|
||||
(u_longlong_t)BP_GET_PSIZE(bp),
|
||||
(u_longlong_t)bp->blk_fill,
|
||||
(u_longlong_t)bp->blk_birth,
|
||||
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1086,7 +1094,7 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
|
|||
}
|
||||
}
|
||||
|
||||
sprintf_blkptr_compact(blkbuf, bp);
|
||||
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("%s\n", blkbuf);
|
||||
}
|
||||
|
||||
|
@ -1101,7 +1109,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
|||
|
||||
print_indirect(bp, zb, dnp);
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
int i;
|
||||
blkptr_t *cbp;
|
||||
|
@ -1226,7 +1234,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
|
|||
zdb_nicenum(ds->ds_compressed_bytes, compressed);
|
||||
zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
|
||||
zdb_nicenum(ds->ds_unique_bytes, unique);
|
||||
sprintf_blkptr(blkbuf, &ds->ds_bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
|
||||
|
||||
(void) printf("\t\tdir_obj = %llu\n",
|
||||
(u_longlong_t)ds->ds_dir_obj);
|
||||
|
@ -1271,7 +1279,7 @@ dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
|||
char blkbuf[BP_SPRINTF_LEN];
|
||||
|
||||
if (bp->blk_birth != 0) {
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("\t%s\n", blkbuf);
|
||||
}
|
||||
return (0);
|
||||
|
@ -1309,7 +1317,7 @@ dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
|||
char blkbuf[BP_SPRINTF_LEN];
|
||||
|
||||
ASSERT(bp->blk_birth != 0);
|
||||
sprintf_blkptr_compact(blkbuf, bp);
|
||||
snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("\t%s\n", blkbuf);
|
||||
return (0);
|
||||
}
|
||||
|
@ -1865,8 +1873,9 @@ dump_dir(objset_t *os)
|
|||
zdb_nicenum(refdbytes, numbuf);
|
||||
|
||||
if (verbosity >= 4) {
|
||||
(void) sprintf(blkbuf, ", rootbp ");
|
||||
(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
|
||||
(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
|
||||
(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
|
||||
sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
|
||||
} else {
|
||||
blkbuf[0] = '\0';
|
||||
}
|
||||
|
@ -1896,7 +1905,7 @@ dump_dir(objset_t *os)
|
|||
if (verbosity < 2)
|
||||
return;
|
||||
|
||||
if (os->os_rootbp->blk_birth == 0)
|
||||
if (BP_IS_HOLE(os->os_rootbp))
|
||||
return;
|
||||
|
||||
dump_object(os, 0, verbosity, &print_header);
|
||||
|
@ -1937,7 +1946,7 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
|
|||
(u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
|
||||
if (dump_opt['u'] >= 3) {
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
sprintf_blkptr(blkbuf, &ub->ub_rootbp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
|
||||
(void) printf("\trootbp = %s\n", blkbuf);
|
||||
}
|
||||
(void) printf("%s", footer ? footer : "");
|
||||
|
@ -2245,7 +2254,7 @@ zdb_blkptr_done(zio_t *zio)
|
|||
zcb->zcb_errors[ioerr]++;
|
||||
|
||||
if (dump_opt['b'] >= 2)
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
else
|
||||
blkbuf[0] = '\0';
|
||||
|
||||
|
@ -2267,11 +2276,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
zdb_cb_t *zcb = arg;
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
dmu_object_type_t type;
|
||||
boolean_t is_metadata;
|
||||
|
||||
if (bp == NULL)
|
||||
if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("objset %llu object %llu "
|
||||
"level %lld offset 0x%llx %s\n",
|
||||
(u_longlong_t)zb->zb_objset,
|
||||
(u_longlong_t)zb->zb_object,
|
||||
(longlong_t)zb->zb_level,
|
||||
(u_longlong_t)blkid2offset(dnp, bp, zb),
|
||||
blkbuf);
|
||||
}
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
type = BP_GET_TYPE(bp);
|
||||
|
@ -2302,17 +2322,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
|
||||
zcb->zcb_readfails = 0;
|
||||
|
||||
if (dump_opt['b'] >= 5) {
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
(void) printf("objset %llu object %llu "
|
||||
"level %lld offset 0x%llx %s\n",
|
||||
(u_longlong_t)zb->zb_objset,
|
||||
(u_longlong_t)zb->zb_object,
|
||||
(longlong_t)zb->zb_level,
|
||||
(u_longlong_t)blkid2offset(dnp, bp, zb),
|
||||
blkbuf);
|
||||
}
|
||||
|
||||
if (dump_opt['b'] < 5 && isatty(STDERR_FILENO) &&
|
||||
gethrtime() > zcb->zcb_lastprint + NANOSEC) {
|
||||
uint64_t now = gethrtime();
|
||||
|
@ -2473,7 +2482,7 @@ count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
|||
|
||||
if (dump_opt['b'] >= 5) {
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("[%s] %s\n",
|
||||
"deferred free", blkbuf);
|
||||
}
|
||||
|
@ -2709,7 +2718,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
avl_index_t where;
|
||||
zdb_ddt_entry_t *zdde, zdde_search;
|
||||
|
||||
if (bp == NULL)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
|
||||
|
@ -2879,7 +2888,7 @@ zdb_print_blkptr(blkptr_t *bp, int flags)
|
|||
if (flags & ZDB_FLAG_BSWAP)
|
||||
byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
|
||||
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("%s\n", blkbuf);
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,10 @@
|
|||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Print intent log header and statistics.
|
||||
*/
|
||||
|
@ -48,7 +52,7 @@ print_log_bp(const blkptr_t *bp, const char *prefix)
|
|||
{
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
|
||||
(void) printf("%s%s\n", prefix, blkbuf);
|
||||
}
|
||||
|
||||
|
@ -133,6 +137,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
|
|||
|
||||
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
|
||||
(void) printf("%shas blkptr, %s\n", prefix,
|
||||
!BP_IS_HOLE(bp) &&
|
||||
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
|
||||
"will claim" : "won't claim");
|
||||
print_log_bp(bp, prefix);
|
||||
|
@ -140,8 +145,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
|
|||
if (BP_IS_HOLE(bp)) {
|
||||
(void) printf("\t\t\tLSIZE 0x%llx\n",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp));
|
||||
}
|
||||
if (bp->blk_birth == 0) {
|
||||
bzero(buf, sizeof (buf));
|
||||
(void) printf("%s<hole>\n", prefix);
|
||||
return;
|
||||
|
@ -314,7 +317,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|||
|
||||
if (verbose >= 5) {
|
||||
(void) strcpy(blkbuf, ", ");
|
||||
sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
|
||||
snprintf_blkptr(blkbuf + strlen(blkbuf),
|
||||
sizeof (blkbuf) - strlen(blkbuf), bp);
|
||||
} else {
|
||||
blkbuf[0] = '\0';
|
||||
}
|
||||
|
@ -362,7 +366,7 @@ dump_intent_log(zilog_t *zilog)
|
|||
int verbose = MAX(dump_opt['d'], dump_opt['i']);
|
||||
int i;
|
||||
|
||||
if (zh->zh_log.blk_birth == 0 || verbose < 1)
|
||||
if (BP_IS_HOLE(&zh->zh_log) || verbose < 1)
|
||||
return;
|
||||
|
||||
(void) printf("\n ZIL header: claim_txg %llu, "
|
||||
|
|
|
@ -277,6 +277,9 @@ zhack_do_feature_stat(int argc, char **argv)
|
|||
dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
|
||||
dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
|
||||
dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
|
||||
if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
|
||||
dump_obj(os, spa->spa_feat_enabled_txg_obj, "enabled_txg");
|
||||
}
|
||||
dump_mos(spa);
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
|
@ -313,7 +316,9 @@ zhack_do_feature_enable(int argc, char **argv)
|
|||
feature.fi_uname = "zhack";
|
||||
feature.fi_mos = B_FALSE;
|
||||
feature.fi_can_readonly = B_FALSE;
|
||||
feature.fi_activate_on_enable = B_FALSE;
|
||||
feature.fi_depends = nodeps;
|
||||
feature.fi_feature = SPA_FEATURE_NONE;
|
||||
|
||||
optind = 1;
|
||||
while ((c = getopt(argc, argv, "rmd:")) != -1) {
|
||||
|
@ -371,7 +376,7 @@ feature_incr_sync(void *arg, dmu_tx_t *tx)
|
|||
zfeature_info_t *feature = arg;
|
||||
uint64_t refcount;
|
||||
|
||||
VERIFY0(feature_get_refcount(spa, feature, &refcount));
|
||||
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
|
||||
feature_sync(spa, feature, refcount + 1, tx);
|
||||
spa_history_log_internal(spa, "zhack feature incr", tx,
|
||||
"name=%s", feature->fi_guid);
|
||||
|
@ -384,7 +389,7 @@ feature_decr_sync(void *arg, dmu_tx_t *tx)
|
|||
zfeature_info_t *feature = arg;
|
||||
uint64_t refcount;
|
||||
|
||||
VERIFY0(feature_get_refcount(spa, feature, &refcount));
|
||||
VERIFY0(feature_get_refcount_from_disk(spa, feature, &refcount));
|
||||
feature_sync(spa, feature, refcount - 1, tx);
|
||||
spa_history_log_internal(spa, "zhack feature decr", tx,
|
||||
"name=%s", feature->fi_guid);
|
||||
|
@ -411,6 +416,7 @@ zhack_do_feature_ref(int argc, char **argv)
|
|||
feature.fi_mos = B_FALSE;
|
||||
feature.fi_desc = NULL;
|
||||
feature.fi_depends = nodeps;
|
||||
feature.fi_feature = SPA_FEATURE_NONE;
|
||||
|
||||
optind = 1;
|
||||
while ((c = getopt(argc, argv, "md")) != -1) {
|
||||
|
@ -459,8 +465,8 @@ zhack_do_feature_ref(int argc, char **argv)
|
|||
|
||||
if (decr) {
|
||||
uint64_t count;
|
||||
if (feature_get_refcount(spa, &feature, &count) == 0 &&
|
||||
count != 0) {
|
||||
if (feature_get_refcount_from_disk(spa, &feature,
|
||||
&count) == 0 && count != 0) {
|
||||
fatal(spa, FTAG, "feature refcount already 0: %s",
|
||||
feature.fi_guid);
|
||||
}
|
||||
|
|
|
@ -266,8 +266,6 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
|
|||
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
|
||||
|
||||
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
|
||||
void dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
|
@ -295,20 +293,6 @@ void dbuf_stats_destroy(void);
|
|||
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))
|
||||
#define DB_DNODE_EXIT(_db) (zrl_remove(&DB_DNODE_LOCK(_db)))
|
||||
#define DB_DNODE_HELD(_db) (!zrl_is_zero(&DB_DNODE_LOCK(_db)))
|
||||
#define DB_GET_SPA(_spa_p, _db) { \
|
||||
dnode_t *__dn; \
|
||||
DB_DNODE_ENTER(_db); \
|
||||
__dn = DB_DNODE(_db); \
|
||||
*(_spa_p) = __dn->dn_objset->os_spa; \
|
||||
DB_DNODE_EXIT(_db); \
|
||||
}
|
||||
#define DB_GET_OBJSET(_os_p, _db) { \
|
||||
dnode_t *__dn; \
|
||||
DB_DNODE_ENTER(_db); \
|
||||
__dn = DB_DNODE(_db); \
|
||||
*(_os_p) = __dn->dn_objset; \
|
||||
DB_DNODE_EXIT(_db); \
|
||||
}
|
||||
|
||||
void dbuf_init(void);
|
||||
void dbuf_fini(void);
|
||||
|
@ -358,7 +342,7 @@ _NOTE(CONSTCOND) } while (0)
|
|||
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \
|
||||
sprintf_blkptr(__blkbuf, bp); \
|
||||
snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
|
||||
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
|
|
|
@ -290,6 +290,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
|||
#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
|
||||
#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
|
||||
#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
|
||||
#define DMU_POOL_FEATURE_ENABLED_TXG "feature_enabled_txg"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
|
@ -67,20 +67,33 @@ struct dsl_dataset;
|
|||
#define BF32_GET(x, low, len) BF32_DECODE(x, low, len)
|
||||
#define BF64_GET(x, low, len) BF64_DECODE(x, low, len)
|
||||
|
||||
#define BF32_SET(x, low, len, val) \
|
||||
((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
|
||||
#define BF64_SET(x, low, len, val) \
|
||||
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
|
||||
#define BF32_SET(x, low, len, val) do { \
|
||||
ASSERT3U(val, <, 1U << (len)); \
|
||||
ASSERT3U(low + len, <=, 32); \
|
||||
(x) ^= BF32_ENCODE((x >> low) ^ (val), low, len); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BF64_SET(x, low, len, val) do { \
|
||||
ASSERT3U(val, <, 1ULL << (len)); \
|
||||
ASSERT3U(low + len, <=, 64); \
|
||||
((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len)); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BF32_GET_SB(x, low, len, shift, bias) \
|
||||
((BF32_GET(x, low, len) + (bias)) << (shift))
|
||||
#define BF64_GET_SB(x, low, len, shift, bias) \
|
||||
((BF64_GET(x, low, len) + (bias)) << (shift))
|
||||
|
||||
#define BF32_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
#define BF64_SET_SB(x, low, len, shift, bias, val) \
|
||||
BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
|
||||
#define BF32_SET_SB(x, low, len, shift, bias, val) do { \
|
||||
ASSERT(IS_P2ALIGNED(val, 1U << shift)); \
|
||||
ASSERT3S((val) >> (shift), >=, bias); \
|
||||
BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
#define BF64_SET_SB(x, low, len, shift, bias, val) do { \
|
||||
ASSERT(IS_P2ALIGNED(val, 1ULL << shift)); \
|
||||
ASSERT3S((val) >> (shift), >=, bias); \
|
||||
BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
/*
|
||||
* We currently support nine block sizes, from 512 bytes to 128K.
|
||||
|
@ -188,6 +201,15 @@ typedef struct zio_cksum {
|
|||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
/*
|
||||
* A block is a hole when it has either 1) never been written to, or
|
||||
* 2) is zero-filled. In both cases, ZFS can return all zeroes for all reads
|
||||
* without physically allocating disk space. Holes are represented in the
|
||||
* blkptr_t structure by zeroed blk_dva. Correct checking for holes is
|
||||
* done through the BP_IS_HOLE macro. For holes, the logical size, level,
|
||||
* DMU object type, and birth times are all also stored for holes that
|
||||
* were written to at some point (i.e. were punched after having been filled).
|
||||
*/
|
||||
typedef struct blkptr {
|
||||
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
|
||||
uint64_t blk_prop; /* size, compression, type, etc */
|
||||
|
@ -202,9 +224,10 @@ typedef struct blkptr {
|
|||
* Macros to get and set fields in a bp or DVA.
|
||||
*/
|
||||
#define DVA_GET_ASIZE(dva) \
|
||||
BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
|
||||
BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
|
||||
#define DVA_SET_ASIZE(dva, x) \
|
||||
BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
|
||||
BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
|
||||
SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define DVA_GET_GRID(dva) BF64_GET((dva)->dva_word[0], 24, 8)
|
||||
#define DVA_SET_GRID(dva, x) BF64_SET((dva)->dva_word[0], 24, 8, x)
|
||||
|
@ -221,14 +244,14 @@ typedef struct blkptr {
|
|||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_PSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
|
||||
BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
|
||||
BF64_SET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
|
@ -248,7 +271,7 @@ typedef struct blkptr {
|
|||
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
|
||||
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
|
||||
|
||||
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
|
||||
#define BP_GET_BYTEORDER(bp) BF64_GET((bp)->blk_prop, 63, 1)
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_PHYSICAL_BIRTH(bp) \
|
||||
|
@ -306,7 +329,9 @@ typedef struct blkptr {
|
|||
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
|
||||
#define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
|
||||
(dva)->dva_word[1] == 0ULL)
|
||||
#define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp))
|
||||
|
||||
/* BP_IS_RAIDZ(bp) assumes no block compression */
|
||||
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
|
||||
|
@ -329,14 +354,10 @@ typedef struct blkptr {
|
|||
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: the byteorder is either 0 or -1, both of which are palindromes.
|
||||
* This simplifies the endianness handling a bit.
|
||||
*/
|
||||
#ifdef _BIG_ENDIAN
|
||||
#define ZFS_HOST_BYTEORDER (0ULL)
|
||||
#else
|
||||
#define ZFS_HOST_BYTEORDER (-1ULL)
|
||||
#define ZFS_HOST_BYTEORDER (1ULL)
|
||||
#endif
|
||||
|
||||
#define BP_SHOULD_BYTESWAP(bp) (BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
|
||||
|
@ -348,19 +369,23 @@ typedef struct blkptr {
|
|||
* 'func' is either snprintf() or mdb_snprintf().
|
||||
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
|
||||
*/
|
||||
#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \
|
||||
#define SNPRINTF_BLKPTR(func, ws, buf, size, bp, type, checksum, compress) \
|
||||
{ \
|
||||
static const char *copyname[] = \
|
||||
{ "zero", "single", "double", "triple" }; \
|
||||
int size = BP_SPRINTF_LEN; \
|
||||
int len = 0; \
|
||||
int copies = 0; \
|
||||
int d; \
|
||||
\
|
||||
if (bp == NULL) { \
|
||||
len = func(buf + len, size - len, "<NULL>"); \
|
||||
len += func(buf + len, size - len, "<NULL>"); \
|
||||
} else if (BP_IS_HOLE(bp)) { \
|
||||
len = func(buf + len, size - len, "<hole>"); \
|
||||
len += func(buf + len, size - len, "<hole>"); \
|
||||
if (bp->blk_birth > 0) { \
|
||||
len += func(buf + len, size - len, \
|
||||
" birth=%lluL", \
|
||||
(u_longlong_t)bp->blk_birth); \
|
||||
} \
|
||||
} else { \
|
||||
for (d = 0; d < BP_GET_NDVAS(bp); d++) { \
|
||||
const dva_t *dva = &bp->blk_dva[d]; \
|
||||
|
@ -642,7 +667,8 @@ extern objset_t *spa_meta_objset(spa_t *spa);
|
|||
extern uint64_t spa_deadman_synctime(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
|
||||
extern void spa_activate_mos_feature(spa_t *spa, const char *feature,
|
||||
dmu_tx_t *tx);
|
||||
extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
|
||||
|
@ -651,7 +677,7 @@ extern char *spa_strdup(const char *);
|
|||
extern void spa_strfree(char *);
|
||||
extern uint64_t spa_get_random(uint64_t range);
|
||||
extern uint64_t spa_generate_guid(spa_t *spa);
|
||||
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
|
||||
extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern int spa_change_guid(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
|
@ -721,7 +747,7 @@ extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
|
|||
#define dprintf_bp(bp, fmt, ...) do { \
|
||||
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
|
||||
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \
|
||||
sprintf_blkptr(__blkbuf, (bp)); \
|
||||
snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
|
||||
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
|
||||
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
|
||||
} \
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/bpobj.h>
|
||||
#include <zfeature_common.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -232,6 +233,9 @@ struct spa {
|
|||
uint64_t spa_feat_for_write_obj; /* required to write to pool */
|
||||
uint64_t spa_feat_for_read_obj; /* required to read from pool */
|
||||
uint64_t spa_feat_desc_obj; /* Feature descriptions */
|
||||
uint64_t spa_feat_enabled_txg_obj; /* Feature enabled txg */
|
||||
/* cache feature refcounts */
|
||||
uint64_t spa_feat_refcount_cache[SPA_FEATURES];
|
||||
taskqid_t spa_deadman_tqid; /* Task id */
|
||||
uint64_t spa_deadman_calls; /* number of deadman calls */
|
||||
hrtime_t spa_sync_starttime; /* starting time of spa_sync */
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_H
|
||||
|
@ -109,7 +109,7 @@ extern boolean_t vdev_accessible(vdev_t *vd, zio_t *zio);
|
|||
|
||||
extern void vdev_cache_init(vdev_t *vd);
|
||||
extern void vdev_cache_fini(vdev_t *vd);
|
||||
extern int vdev_cache_read(zio_t *zio);
|
||||
extern boolean_t vdev_cache_read(zio_t *zio);
|
||||
extern void vdev_cache_write(zio_t *zio);
|
||||
extern void vdev_cache_purge(vdev_t *vd);
|
||||
|
||||
|
|
|
@ -34,6 +34,10 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define VALID_FEATURE_FID(fid) ((fid) >= 0 && (fid) < SPA_FEATURES)
|
||||
#define VALID_FEATURE_OR_NONE(fid) ((fid) == SPA_FEATURE_NONE || \
|
||||
VALID_FEATURE_FID(fid))
|
||||
|
||||
struct spa;
|
||||
struct dmu_tx;
|
||||
struct objset;
|
||||
|
@ -45,6 +49,8 @@ extern void spa_feature_incr(struct spa *, spa_feature_t, struct dmu_tx *);
|
|||
extern void spa_feature_decr(struct spa *, spa_feature_t, struct dmu_tx *);
|
||||
extern boolean_t spa_feature_is_enabled(struct spa *, spa_feature_t);
|
||||
extern boolean_t spa_feature_is_active(struct spa *, spa_feature_t);
|
||||
extern boolean_t spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid,
|
||||
uint64_t *txg);
|
||||
extern uint64_t spa_feature_refcount(spa_t *, spa_feature_t, uint64_t);
|
||||
extern boolean_t spa_features_check(spa_t *, boolean_t, nvlist_t *, nvlist_t *);
|
||||
|
||||
|
@ -53,6 +59,8 @@ extern boolean_t spa_features_check(spa_t *, boolean_t, nvlist_t *, nvlist_t *);
|
|||
* use the above interfaces.
|
||||
*/
|
||||
extern int feature_get_refcount(struct spa *, zfeature_info_t *, uint64_t *);
|
||||
extern int feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
|
||||
uint64_t *res);
|
||||
extern void feature_enable_sync(struct spa *, zfeature_info_t *,
|
||||
struct dmu_tx *);
|
||||
extern void feature_sync(struct spa *, zfeature_info_t *, uint64_t,
|
||||
|
|
|
@ -43,10 +43,14 @@ typedef enum spa_feature {
|
|||
SPA_FEATURE_EMPTY_BPOBJ,
|
||||
SPA_FEATURE_LZ4_COMPRESS,
|
||||
SPA_FEATURE_SPACEMAP_HISTOGRAM,
|
||||
SPA_FEATURE_ENABLED_TXG,
|
||||
SPA_FEATURE_HOLE_BIRTH,
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
#define SPA_FEATURE_DISABLED (-1ULL)
|
||||
|
||||
typedef struct zfeature_info {
|
||||
spa_feature_t fi_feature;
|
||||
const char *fi_uname; /* User-facing feature name */
|
||||
|
@ -54,6 +58,8 @@ typedef struct zfeature_info {
|
|||
const char *fi_desc; /* Feature description */
|
||||
boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
|
||||
boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
|
||||
/* Activate this feature at the same time it is enabled */
|
||||
boolean_t fi_activate_on_enable;
|
||||
/* array of dependencies, terminated by SPA_FEATURE_NONE */
|
||||
const spa_feature_t *fi_depends;
|
||||
} zfeature_info_t;
|
||||
|
@ -68,6 +74,7 @@ extern boolean_t zfeature_is_valid_guid(const char *);
|
|||
|
||||
extern boolean_t zfeature_is_supported(const char *);
|
||||
extern int zfeature_lookup_name(const char *name, spa_feature_t *res);
|
||||
extern boolean_t zfeature_depends_on(spa_feature_t fid, spa_feature_t check);
|
||||
|
||||
extern void zpool_feature_init(void);
|
||||
|
||||
|
|
|
@ -273,5 +273,70 @@ this feature are destroyed.
|
|||
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fBenabled_txg\fR\fR
|
||||
.ad
|
||||
.RS 4n
|
||||
.TS
|
||||
l l .
|
||||
GUID com.delphix:enabled_txg
|
||||
READ\-ONLY COMPATIBLE yes
|
||||
DEPENDENCIES none
|
||||
.TE
|
||||
|
||||
Once this feature is enabled ZFS records the transaction group number
|
||||
in which new features are enabled. This has no user-visible impact,
|
||||
but other features may depend on this feature.
|
||||
|
||||
This feature becomes \fBactive\fR as soon as it is enabled and will
|
||||
never return to being \fBenabled\fB.
|
||||
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fBhole_birth\fR\fR
|
||||
.ad
|
||||
.RS 4n
|
||||
.TS
|
||||
l l .
|
||||
GUID com.delphix:hole_birth
|
||||
READ\-ONLY COMPATIBLE no
|
||||
DEPENDENCIES enabled_txg
|
||||
.TE
|
||||
|
||||
This feature improves performance of incremental sends ("zfs send -i")
|
||||
and receives for objects with many holes. The most common case of
|
||||
hole-filled objects is zvols.
|
||||
|
||||
An incremental send stream from snapshot \fBA\fR to snapshot \fBB\fR
|
||||
contains information about every block that changed between \fBA\fR and
|
||||
\fBB\fR. Blocks which did not change between those snapshots can be
|
||||
identified and omitted from the stream using a piece of metadata called
|
||||
the 'block birth time', but birth times are not recorded for holes (blocks
|
||||
filled only with zeroes). Since holes created after \fBA\fR cannot be
|
||||
distinguished from holes created before \fBA\fR, information about every
|
||||
hole in the entire filesystem or zvol is included in the send stream.
|
||||
|
||||
For workloads where holes are rare this is not a problem. However, when
|
||||
incrementally replicating filesystems or zvols with many holes (for
|
||||
example a zvol formatted with another filesystem) a lot of time will
|
||||
be spent sending and receiving unnecessary information about holes that
|
||||
already exist on the receiving side.
|
||||
|
||||
Once the \fBhole_birth\fR feature has been enabled the block birth times
|
||||
of all new holes will be recorded. Incremental sends between snapshots
|
||||
created after this feature is enabled will use this new metadata to avoid
|
||||
sending information about holes that already exist on the receiving side.
|
||||
|
||||
This feature becomes \fBactive\fR as soon as it is enabled and will
|
||||
never return to being \fBenabled\fB.
|
||||
|
||||
.RE
|
||||
|
||||
|
||||
.SH "SEE ALSO"
|
||||
\fBzpool\fR(8)
|
||||
|
|
|
@ -795,7 +795,7 @@ buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
|
|||
#define BUF_EMPTY(buf) \
|
||||
((buf)->b_dva.dva_word[0] == 0 && \
|
||||
(buf)->b_dva.dva_word[1] == 0 && \
|
||||
(buf)->b_birth == 0)
|
||||
(buf)->b_cksum0 == 0)
|
||||
|
||||
#define BUF_EQUAL(spa, dva, birth, buf) \
|
||||
((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
|
||||
|
@ -3854,9 +3854,13 @@ arc_write_done(zio_t *zio)
|
|||
ASSERT(hdr->b_acb == NULL);
|
||||
|
||||
if (zio->io_error == 0) {
|
||||
hdr->b_dva = *BP_IDENTITY(zio->io_bp);
|
||||
hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
|
||||
hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
|
||||
if (BP_IS_HOLE(zio->io_bp)) {
|
||||
buf_discard_identity(hdr);
|
||||
} else {
|
||||
hdr->b_dva = *BP_IDENTITY(zio->io_bp);
|
||||
hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
|
||||
hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
|
||||
}
|
||||
} else {
|
||||
ASSERT(BUF_EMPTY(hdr));
|
||||
}
|
||||
|
|
|
@ -141,7 +141,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
int err;
|
||||
struct bptree_args *ba = arg;
|
||||
|
||||
if (bp == NULL)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
|
||||
|
|
|
@ -511,10 +511,9 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
|
|||
mutex_enter(&db->db_mtx);
|
||||
if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
|
||||
int blksz = db->db.db_size;
|
||||
spa_t *spa;
|
||||
spa_t *spa = db->db_objset->os_spa;
|
||||
|
||||
mutex_exit(&db->db_mtx);
|
||||
DB_GET_SPA(&spa, db);
|
||||
abuf = arc_loan_buf(spa, blksz);
|
||||
bcopy(db->db.db_data, abuf->b_data, blksz);
|
||||
} else {
|
||||
|
@ -575,7 +574,6 @@ static void
|
|||
dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
||||
{
|
||||
dnode_t *dn;
|
||||
spa_t *spa;
|
||||
zbookmark_t zb;
|
||||
uint32_t aflags = ARC_NOWAIT;
|
||||
|
||||
|
@ -615,9 +613,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||
BP_IS_HOLE(db->db_blkptr)))) {
|
||||
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
|
||||
|
||||
dbuf_set_data(db, arc_buf_alloc(dn->dn_objset->os_spa,
|
||||
db->db.db_size, db, type));
|
||||
DB_DNODE_EXIT(db);
|
||||
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
|
||||
db->db.db_size, db, type));
|
||||
bzero(db->db.db_data, db->db.db_size);
|
||||
db->db_state = DB_CACHED;
|
||||
*flags |= DB_RF_CACHED;
|
||||
|
@ -625,7 +623,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||
return;
|
||||
}
|
||||
|
||||
spa = dn->dn_objset->os_spa;
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
db->db_state = DB_READ;
|
||||
|
@ -642,7 +639,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||
|
||||
dbuf_add_ref(db, NULL);
|
||||
|
||||
(void) arc_read(zio, spa, db->db_blkptr,
|
||||
(void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
|
||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
|
||||
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
|
||||
&aflags, &zb);
|
||||
|
@ -654,8 +651,8 @@ int
|
|||
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
|
||||
{
|
||||
int err = 0;
|
||||
int havepzio = (zio != NULL);
|
||||
int prefetch;
|
||||
boolean_t havepzio = (zio != NULL);
|
||||
boolean_t prefetch;
|
||||
dnode_t *dn;
|
||||
|
||||
/*
|
||||
|
@ -750,11 +747,10 @@ dbuf_noread(dmu_buf_impl_t *db)
|
|||
cv_wait(&db->db_changed, &db->db_mtx);
|
||||
if (db->db_state == DB_UNCACHED) {
|
||||
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
|
||||
spa_t *spa;
|
||||
spa_t *spa = db->db_objset->os_spa;
|
||||
|
||||
ASSERT(db->db_buf == NULL);
|
||||
ASSERT(db->db.db_data == NULL);
|
||||
DB_GET_SPA(&spa, db);
|
||||
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
|
||||
db->db_state = DB_FILL;
|
||||
} else if (db->db_state == DB_NOFILL) {
|
||||
|
@ -809,9 +805,8 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
|
|||
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
|
||||
int size = db->db.db_size;
|
||||
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
|
||||
spa_t *spa;
|
||||
spa_t *spa = db->db_objset->os_spa;
|
||||
|
||||
DB_GET_SPA(&spa, db);
|
||||
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
|
||||
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
|
||||
} else {
|
||||
|
@ -837,12 +832,9 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
|
|||
ASSERT(db->db_data_pending != dr);
|
||||
|
||||
/* free this block */
|
||||
if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) {
|
||||
spa_t *spa;
|
||||
if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite)
|
||||
zio_free(db->db_objset->os_spa, txg, bp);
|
||||
|
||||
DB_GET_SPA(&spa, db);
|
||||
zio_free(spa, txg, bp);
|
||||
}
|
||||
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
|
||||
dr->dt.dl.dr_nopwrite = B_FALSE;
|
||||
|
||||
|
@ -860,9 +852,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
|
|||
/*
|
||||
* Evict (if its unreferenced) or clear (if its referenced) any level-0
|
||||
* data blocks in the free range, so that any future readers will find
|
||||
* empty blocks. Also, if we happen across any level-1 dbufs in the
|
||||
* range that have not already been marked dirty, mark them dirty so
|
||||
* they stay in memory.
|
||||
* empty blocks.
|
||||
*
|
||||
* This is a no-op if the dataset is in the middle of an incremental
|
||||
* receive; see comment below for details.
|
||||
|
@ -872,14 +862,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
|
|||
{
|
||||
dmu_buf_impl_t *db, *db_next;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
uint64_t first_l1 = start >> epbs;
|
||||
uint64_t last_l1 = end >> epbs;
|
||||
|
||||
if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
|
||||
if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID))
|
||||
end = dn->dn_maxblkid;
|
||||
last_l1 = end >> epbs;
|
||||
}
|
||||
dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
|
||||
|
||||
mutex_enter(&dn->dn_dbufs_mtx);
|
||||
|
@ -902,23 +887,8 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
|
|||
db_next = list_next(&dn->dn_dbufs, db);
|
||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||
|
||||
if (db->db_level == 1 &&
|
||||
db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
|
||||
mutex_enter(&db->db_mtx);
|
||||
if (db->db_last_dirty &&
|
||||
db->db_last_dirty->dr_txg < txg) {
|
||||
dbuf_add_ref(db, FTAG);
|
||||
mutex_exit(&db->db_mtx);
|
||||
dbuf_will_dirty(db, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
} else {
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
if (db->db_level != 0)
|
||||
continue;
|
||||
dprintf_dbuf(db, "found buf %s\n", "");
|
||||
if (db->db_blkid < start || db->db_blkid > end)
|
||||
continue;
|
||||
|
||||
|
@ -995,24 +965,29 @@ dbuf_block_freeable(dmu_buf_impl_t *db)
|
|||
* We don't need any locking to protect db_blkptr:
|
||||
* If it's syncing, then db_last_dirty will be set
|
||||
* so we'll ignore db_blkptr.
|
||||
*
|
||||
* This logic ensures that only block births for
|
||||
* filled blocks are considered.
|
||||
*/
|
||||
ASSERT(MUTEX_HELD(&db->db_mtx));
|
||||
if (db->db_last_dirty)
|
||||
if (db->db_last_dirty && (db->db_blkptr == NULL ||
|
||||
!BP_IS_HOLE(db->db_blkptr))) {
|
||||
birth_txg = db->db_last_dirty->dr_txg;
|
||||
else if (db->db_blkptr)
|
||||
} else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
|
||||
birth_txg = db->db_blkptr->blk_birth;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't exist or are in a snapshot, we can't be freed.
|
||||
* If this block don't exist or is in a snapshot, it can't be freed.
|
||||
* Don't pass the bp to dsl_dataset_block_freeable() since we
|
||||
* are holding the db_mtx lock and might deadlock if we are
|
||||
* prefetching a dedup-ed block.
|
||||
*/
|
||||
if (birth_txg)
|
||||
if (birth_txg != 0)
|
||||
return (ds == NULL ||
|
||||
dsl_dataset_block_freeable(ds, NULL, birth_txg));
|
||||
else
|
||||
return (FALSE);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1032,7 +1007,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
|
|||
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
|
||||
|
||||
/*
|
||||
* This call to dbuf_will_dirty() with the dn_struct_rwlock held
|
||||
* This call to dmu_buf_will_dirty() with the dn_struct_rwlock held
|
||||
* is OK, because there can be no other references to the db
|
||||
* when we are changing its size, so no concurrent DB_FILL can
|
||||
* be happening.
|
||||
|
@ -1041,7 +1016,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
|
|||
* XXX we should be doing a dbuf_read, checking the return
|
||||
* value and returning that up to our callers
|
||||
*/
|
||||
dbuf_will_dirty(db, tx);
|
||||
dmu_buf_will_dirty(&db->db, tx);
|
||||
|
||||
/* create the data buffer for the new block */
|
||||
buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
|
||||
|
@ -1071,9 +1046,8 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
|
|||
void
|
||||
dbuf_release_bp(dmu_buf_impl_t *db)
|
||||
{
|
||||
objset_t *os;
|
||||
ASSERTV(objset_t *os = db->db_objset);
|
||||
|
||||
DB_GET_OBJSET(&os, db);
|
||||
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
|
||||
ASSERT(arc_released(os->os_phys_buf) ||
|
||||
list_link_active(&os->os_dsl_dataset->ds_synced_link));
|
||||
|
@ -1448,10 +1422,10 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||
return (B_FALSE);
|
||||
}
|
||||
|
||||
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
|
||||
void
|
||||
dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
|
||||
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
|
||||
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
|
@ -1574,7 +1548,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
|||
db->db_state = DB_FILL;
|
||||
mutex_exit(&db->db_mtx);
|
||||
(void) dbuf_dirty(db, tx);
|
||||
dbuf_fill_done(db, tx);
|
||||
dmu_buf_fill_done(&db->db, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2132,7 +2106,6 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
|
|||
* Without that, the dbuf_rele() could lead to a dnode_rele() followed by the
|
||||
* dnode's parent dbuf evicting its dnode handles.
|
||||
*/
|
||||
#pragma weak dmu_buf_rele = dbuf_rele
|
||||
void
|
||||
dbuf_rele(dmu_buf_impl_t *db, void *tag)
|
||||
{
|
||||
|
@ -2140,6 +2113,12 @@ dbuf_rele(dmu_buf_impl_t *db, void *tag)
|
|||
dbuf_rele_and_unlock(db, tag);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_rele(dmu_buf_t *db, void *tag)
|
||||
{
|
||||
dbuf_rele((dmu_buf_impl_t *)db, tag);
|
||||
}
|
||||
|
||||
/*
|
||||
* dbuf_rele() for an already-locked dbuf. This is necessary to allow
|
||||
* db_dirtycnt and db_holds to be updated atomically.
|
||||
|
@ -2600,18 +2579,14 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
dnode_diduse_space(dn, delta - zio->io_prev_space_delta);
|
||||
zio->io_prev_space_delta = delta;
|
||||
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
ASSERT(bp->blk_fill == 0);
|
||||
DB_DNODE_EXIT(db);
|
||||
return;
|
||||
if (bp->blk_birth != 0) {
|
||||
ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_type) ||
|
||||
(db->db_blkid == DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_bonustype));
|
||||
ASSERT(BP_GET_LEVEL(bp) == db->db_level);
|
||||
}
|
||||
|
||||
ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_type) ||
|
||||
(db->db_blkid == DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_bonustype));
|
||||
ASSERT(BP_GET_LEVEL(bp) == db->db_level);
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
@ -2637,7 +2612,11 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
fill++;
|
||||
}
|
||||
} else {
|
||||
fill = 1;
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
fill = 0;
|
||||
} else {
|
||||
fill = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
blkptr_t *ibp = db->db.db_data;
|
||||
|
@ -2692,9 +2671,10 @@ static void
|
|||
dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
{
|
||||
dmu_buf_impl_t *db = vdb;
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
blkptr_t *bp_orig = &zio->io_bp_orig;
|
||||
uint64_t txg = zio->io_txg;
|
||||
blkptr_t *bp = db->db_blkptr;
|
||||
objset_t *os = db->db_objset;
|
||||
dmu_tx_t *tx = os->os_synctx;
|
||||
dbuf_dirty_record_t **drp, *dr;
|
||||
|
||||
ASSERT0(zio->io_error);
|
||||
|
@ -2707,14 +2687,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) {
|
||||
ASSERT(BP_EQUAL(bp, bp_orig));
|
||||
} else {
|
||||
objset_t *os;
|
||||
dsl_dataset_t *ds;
|
||||
dmu_tx_t *tx;
|
||||
|
||||
DB_GET_OBJSET(&os, db);
|
||||
ds = os->os_dsl_dataset;
|
||||
tx = os->os_synctx;
|
||||
|
||||
dsl_dataset_t *ds = os->os_dsl_dataset;
|
||||
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
|
||||
dsl_dataset_block_born(ds, bp, tx);
|
||||
}
|
||||
|
@ -2727,7 +2700,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
while ((dr = *drp) != db->db_data_pending)
|
||||
drp = &dr->dr_next;
|
||||
ASSERT(!list_link_active(&dr->dr_dirty_node));
|
||||
ASSERT(dr->dr_txg == txg);
|
||||
ASSERT(dr->dr_dbuf == db);
|
||||
ASSERT(dr->dr_next == NULL);
|
||||
*drp = dr->dr_next;
|
||||
|
@ -2761,14 +2733,14 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
|
||||
ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
|
||||
ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
|
||||
if (!BP_IS_HOLE(db->db_blkptr)) {
|
||||
ASSERTV(int epbs = dn->dn_phys->dn_indblkshift -
|
||||
SPA_BLKPTRSHIFT);
|
||||
ASSERT3U(db->db_blkid, <=,
|
||||
dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
|
||||
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
|
||||
db->db.db_size);
|
||||
ASSERT3U(dn->dn_phys->dn_maxblkid
|
||||
>> (db->db_level * epbs), >=, db->db_blkid);
|
||||
arc_set_callback(db->db_buf, dbuf_do_evict, db);
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
|
@ -2781,8 +2753,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
|||
ASSERT(db->db_dirtycnt > 0);
|
||||
db->db_dirtycnt -= 1;
|
||||
db->db_data_pending = NULL;
|
||||
|
||||
dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
|
||||
dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -116,12 +116,12 @@ ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
|
|||
error = zap_lookup(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT, name,
|
||||
sizeof (uint64_t), 1, &ddt->ddt_object[type][class]);
|
||||
|
||||
if (error)
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
error = zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
|
||||
VERIFY0(zap_lookup(ddt->ddt_os, ddt->ddt_spa->spa_ddt_stat_object, name,
|
||||
sizeof (uint64_t), sizeof (ddt_histogram_t) / sizeof (uint64_t),
|
||||
&ddt->ddt_histogram[type][class]);
|
||||
&ddt->ddt_histogram[type][class]));
|
||||
|
||||
/*
|
||||
* Seed the cached statistics.
|
||||
|
@ -138,8 +138,7 @@ ddt_object_load(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
|
|||
ddo->ddo_dspace = doi.doi_physical_blocks_512 << 9;
|
||||
ddo->ddo_mspace = doi.doi_fill_count * doi.doi_data_block_size;
|
||||
|
||||
ASSERT(error == 0);
|
||||
return (error);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -616,7 +615,10 @@ ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len)
|
|||
bcopy(src, dst, s_len);
|
||||
}
|
||||
|
||||
*version = (ZFS_HOST_BYTEORDER & DDT_COMPRESS_BYTEORDER_MASK) | cpfunc;
|
||||
*version = cpfunc;
|
||||
/* CONSTCOND */
|
||||
if (ZFS_HOST_BYTEORDER)
|
||||
*version |= DDT_COMPRESS_BYTEORDER_MASK;
|
||||
|
||||
return (c_len + 1);
|
||||
}
|
||||
|
@ -633,7 +635,8 @@ ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
|
|||
else
|
||||
bcopy(src, dst, d_len);
|
||||
|
||||
if ((version ^ ZFS_HOST_BYTEORDER) & DDT_COMPRESS_BYTEORDER_MASK)
|
||||
if (((version & DDT_COMPRESS_BYTEORDER_MASK) != 0) !=
|
||||
(ZFS_HOST_BYTEORDER != 0))
|
||||
byteswap_uint64_array(dst, d_len);
|
||||
}
|
||||
|
||||
|
|
|
@ -684,7 +684,7 @@ dmu_free_long_range(objset_t *os, uint64_t object,
|
|||
* will take the fast path, and (b) dnode_reallocate() can verify
|
||||
* that the entire file has been freed.
|
||||
*/
|
||||
if (offset == 0 && length == DMU_OBJECT_END)
|
||||
if (err == 0 && offset == 0 && length == DMU_OBJECT_END)
|
||||
dn->dn_maxblkid = 0;
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
@ -1314,10 +1314,8 @@ arc_buf_t *
|
|||
dmu_request_arcbuf(dmu_buf_t *handle, int size)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
|
||||
spa_t *spa;
|
||||
|
||||
DB_GET_SPA(&spa, db);
|
||||
return (arc_loan_buf(spa, size));
|
||||
return (arc_loan_buf(db->db_objset->os_spa, size));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -118,7 +118,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
if (zb->zb_object != DMU_META_DNODE_OBJECT)
|
||||
return (0);
|
||||
|
||||
if (bp == NULL) {
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
uint64_t span = DBP_SPAN(dnp, zb->zb_level);
|
||||
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
|
||||
|
||||
|
|
|
@ -385,11 +385,12 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
|
||||
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
|
||||
return (0);
|
||||
} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
|
||||
} else if (BP_IS_HOLE(bp) &&
|
||||
zb->zb_object == DMU_META_DNODE_OBJECT) {
|
||||
uint64_t span = BP_SPAN(dnp, zb->zb_level);
|
||||
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
|
||||
err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
|
||||
} else if (bp == NULL) {
|
||||
} else if (BP_IS_HOLE(bp)) {
|
||||
uint64_t span = BP_SPAN(dnp, zb->zb_level);
|
||||
err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
|
||||
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <sys/sa.h>
|
||||
#include <sys/sa_impl.h>
|
||||
#include <sys/callb.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
int zfs_pd_blks_max = 100;
|
||||
|
||||
|
@ -74,7 +75,7 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|||
traverse_data_t *td = arg;
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth == 0)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
|
||||
|
@ -98,7 +99,7 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
|||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth == 0)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
if (claim_txg == 0 || bp->blk_birth < claim_txg)
|
||||
|
@ -225,13 +226,34 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
|
||||
return (err);
|
||||
if (bp->blk_birth == 0) {
|
||||
if (spa_feature_is_active(td->td_spa, SPA_FEATURE_HOLE_BIRTH)) {
|
||||
/*
|
||||
* Since this block has a birth time of 0 it must be a
|
||||
* hole created before the SPA_FEATURE_HOLE_BIRTH
|
||||
* feature was enabled. If SPA_FEATURE_HOLE_BIRTH
|
||||
* was enabled before the min_txg for this traveral we
|
||||
* know the hole must have been created before the
|
||||
* min_txg for this traveral, so we can skip it. If
|
||||
* SPA_FEATURE_HOLE_BIRTH was enabled after the min_txg
|
||||
* for this traveral we cannot tell if the hole was
|
||||
* created before or after the min_txg for this
|
||||
* traversal, so we cannot skip it.
|
||||
*/
|
||||
uint64_t hole_birth_enabled_txg;
|
||||
VERIFY(spa_feature_enabled_txg(td->td_spa,
|
||||
SPA_FEATURE_HOLE_BIRTH, &hole_birth_enabled_txg));
|
||||
if (hole_birth_enabled_txg < td->td_min_txg)
|
||||
return (0);
|
||||
}
|
||||
} else if (bp->blk_birth <= td->td_min_txg) {
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (bp->blk_birth <= td->td_min_txg)
|
||||
return (0);
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
|
||||
return (err);
|
||||
}
|
||||
|
||||
if (td->td_pfd && !td->td_pfd->pd_exited &&
|
||||
((td->td_pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
|
@ -441,7 +463,8 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
if (pfd->pd_cancel)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
if (BP_IS_HOLE(bp) ||
|
||||
!((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
|
||||
return (0);
|
||||
|
|
|
@ -1545,7 +1545,13 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||
} else {
|
||||
ASSERT(dn->dn_maxblkid == 0);
|
||||
if (off == 0 && len >= blksz) {
|
||||
/* Freeing the whole block; fast-track this request */
|
||||
/*
|
||||
* Freeing the whole block; fast-track this request.
|
||||
* Note that we won't dirty any indirect blocks,
|
||||
* which is fine because we will be freeing the entire
|
||||
* file and thus all indirect blocks will be freed
|
||||
* by free_children().
|
||||
*/
|
||||
blkid = 0;
|
||||
nblks = 1;
|
||||
goto done;
|
||||
|
@ -1572,7 +1578,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||
if (db->db_last_dirty ||
|
||||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dbuf_will_dirty(db, tx);
|
||||
dmu_buf_will_dirty(&db->db, tx);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
data = db->db.db_data;
|
||||
bzero(data + blkoff, head);
|
||||
|
@ -1608,7 +1614,7 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||
if (db->db_last_dirty ||
|
||||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dbuf_will_dirty(db, tx);
|
||||
dmu_buf_will_dirty(&db->db, tx);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
bzero(db->db.db_data, tail);
|
||||
}
|
||||
|
@ -1629,18 +1635,18 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||
nblks += 1;
|
||||
|
||||
/*
|
||||
* Read in and mark all the level-1 indirects dirty,
|
||||
* so that they will stay in memory until syncing phase.
|
||||
* Always dirty the first and last indirect to make sure
|
||||
* we dirty all the partial indirects.
|
||||
* Dirty the first and last indirect blocks, as they (and/or their
|
||||
* parents) will need to be written out if they were only
|
||||
* partially freed. Interior indirect blocks will be themselves freed,
|
||||
* by free_children(), so they need not be dirtied. Note that these
|
||||
* interior blocks have already been prefetched by dmu_tx_hold_free().
|
||||
*/
|
||||
if (dn->dn_nlevels > 1) {
|
||||
uint64_t i, first, last;
|
||||
int shift = epbs + dn->dn_datablkshift;
|
||||
uint64_t first, last;
|
||||
|
||||
first = blkid >> epbs;
|
||||
if ((db = dbuf_hold_level(dn, 1, first, FTAG))) {
|
||||
dbuf_will_dirty(db, tx);
|
||||
dmu_buf_will_dirty(&db->db, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
if (trunc)
|
||||
|
@ -1648,26 +1654,11 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
|||
else
|
||||
last = (blkid + nblks - 1) >> epbs;
|
||||
if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) {
|
||||
dbuf_will_dirty(db, tx);
|
||||
dmu_buf_will_dirty(&db->db, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
for (i = first + 1; i < last; i++) {
|
||||
uint64_t ibyte = i << shift;
|
||||
int err;
|
||||
|
||||
err = dnode_next_offset(dn,
|
||||
DNODE_FIND_HAVELOCK, &ibyte, 1, 1, 0);
|
||||
i = ibyte >> shift;
|
||||
if (err == ESRCH || i >= last)
|
||||
break;
|
||||
ASSERT(err == 0);
|
||||
db = dbuf_hold_level(dn, 1, i, FTAG);
|
||||
if (db) {
|
||||
dbuf_will_dirty(db, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
/*
|
||||
* Add this range to the dnode range list.
|
||||
|
@ -1695,8 +1686,6 @@ done:
|
|||
dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
|
||||
dnode_setdirty(dn, tx);
|
||||
out:
|
||||
if (trunc && dn->dn_maxblkid >= (off >> blkshift))
|
||||
dn->dn_maxblkid = (off >> blkshift ? (off >> blkshift) - 1 : 0);
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
@ -1873,8 +1862,10 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
|||
data = db->db.db_data;
|
||||
}
|
||||
|
||||
if (db && txg &&
|
||||
(db->db_blkptr == NULL || db->db_blkptr->blk_birth <= txg)) {
|
||||
|
||||
if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
|
||||
db->db_blkptr->blk_birth <= txg ||
|
||||
BP_IS_HOLE(db->db_blkptr))) {
|
||||
/*
|
||||
* This can only happen when we are searching up the tree
|
||||
* and these conditions mean that we need to keep climbing.
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
static void
|
||||
dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
|
||||
|
@ -112,26 +113,48 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
|
|||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
static int
|
||||
static void
|
||||
free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint64_t bytesfreed = 0;
|
||||
int i, blocks_freed = 0;
|
||||
int i;
|
||||
|
||||
dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
|
||||
|
||||
for (i = 0; i < num; i++, bp++) {
|
||||
uint64_t lsize, lvl;
|
||||
dmu_object_type_t type;
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
|
||||
bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
|
||||
ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
|
||||
|
||||
/*
|
||||
* Save some useful information on the holes being
|
||||
* punched, including logical size, type, and indirection
|
||||
* level. Retaining birth time enables detection of when
|
||||
* holes are punched for reducing the number of free
|
||||
* records transmitted during a zfs send.
|
||||
*/
|
||||
|
||||
lsize = BP_GET_LSIZE(bp);
|
||||
type = BP_GET_TYPE(bp);
|
||||
lvl = BP_GET_LEVEL(bp);
|
||||
|
||||
bzero(bp, sizeof (blkptr_t));
|
||||
blocks_freed += 1;
|
||||
|
||||
if (spa_feature_is_active(dn->dn_objset->os_spa,
|
||||
SPA_FEATURE_HOLE_BIRTH)) {
|
||||
BP_SET_LSIZE(bp, lsize);
|
||||
BP_SET_TYPE(bp, type);
|
||||
BP_SET_LEVEL(bp, lvl);
|
||||
BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
|
||||
}
|
||||
}
|
||||
dnode_diduse_space(dn, -bytesfreed);
|
||||
return (blocks_freed);
|
||||
}
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
@ -215,30 +238,27 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
|
|||
|
||||
#define ALL -1
|
||||
|
||||
static int
|
||||
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
||||
static void
|
||||
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
blkptr_t *bp;
|
||||
dmu_buf_impl_t *subdb;
|
||||
uint64_t start, end, dbstart, dbend, i;
|
||||
int epbs, shift, err;
|
||||
int all = TRUE;
|
||||
int blocks_freed = 0;
|
||||
int epbs, shift;
|
||||
|
||||
/*
|
||||
* There is a small possibility that this block will not be cached:
|
||||
* 1 - if level > 1 and there are no children with level <= 1
|
||||
* 2 - if we didn't get a dirty hold (because this block had just
|
||||
* finished being written -- and so had no holds), and then this
|
||||
* block got evicted before we got here.
|
||||
* 2 - if this block was evicted since we read it from
|
||||
* dmu_tx_hold_free().
|
||||
*/
|
||||
if (db->db_state != DB_CACHED)
|
||||
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
|
||||
|
||||
dbuf_release_bp(db);
|
||||
bp = (blkptr_t *)db->db.db_data;
|
||||
bp = db->db.db_data;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
|
@ -248,7 +268,6 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
|||
start = blkid >> shift;
|
||||
if (dbstart < start) {
|
||||
bp += start - dbstart;
|
||||
all = FALSE;
|
||||
} else {
|
||||
start = dbstart;
|
||||
}
|
||||
|
@ -256,49 +275,46 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
|||
end = (blkid + nblks - 1) >> shift;
|
||||
if (dbend <= end)
|
||||
end = dbend;
|
||||
else if (all)
|
||||
all = trunc;
|
||||
|
||||
ASSERT3U(start, <=, end);
|
||||
|
||||
if (db->db_level == 1) {
|
||||
FREE_VERIFY(db, start, end, tx);
|
||||
blocks_freed = free_blocks(dn, bp, end-start+1, tx);
|
||||
arc_buf_freeze(db->db_buf);
|
||||
ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
|
||||
DB_DNODE_EXIT(db);
|
||||
return (all ? ALL : blocks_freed);
|
||||
}
|
||||
free_blocks(dn, bp, end-start+1, tx);
|
||||
} else {
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
|
||||
i, B_TRUE, FTAG, &subdb));
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
ASSERT3P(bp, ==, subdb->db_blkptr);
|
||||
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
|
||||
ASSERT0(err);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
|
||||
ASSERT3P(subdb->db_blkptr, ==, bp);
|
||||
blocks_freed += free_blocks(dn, bp, 1, tx);
|
||||
} else {
|
||||
all = FALSE;
|
||||
free_children(subdb, blkid, nblks, tx);
|
||||
dbuf_rele(subdb, FTAG);
|
||||
}
|
||||
dbuf_rele(subdb, FTAG);
|
||||
}
|
||||
|
||||
/* If this whole block is free, free ourself too. */
|
||||
for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
|
||||
if (!BP_IS_HOLE(bp))
|
||||
break;
|
||||
}
|
||||
if (i == 1 << epbs) {
|
||||
/* didn't find any non-holes */
|
||||
bzero(db->db.db_data, db->db.db_size);
|
||||
free_blocks(dn, db->db_blkptr, 1, tx);
|
||||
} else {
|
||||
/*
|
||||
* Partial block free; must be marked dirty so that it
|
||||
* will be written out.
|
||||
*/
|
||||
ASSERT(db->db_dirtycnt > 0);
|
||||
}
|
||||
|
||||
DB_DNODE_EXIT(db);
|
||||
arc_buf_freeze(db->db_buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
bp -= (end-start)+1;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (i == start && blkid != 0)
|
||||
continue;
|
||||
else if (i == end && !trunc)
|
||||
continue;
|
||||
ASSERT0(bp->blk_birth);
|
||||
}
|
||||
#endif
|
||||
ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
|
||||
return (all ? ALL : blocks_freed);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -306,20 +322,21 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
|
|||
* and "free" all the blocks contained there.
|
||||
*/
|
||||
static void
|
||||
dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
|
||||
dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
blkptr_t *bp = dn->dn_phys->dn_blkptr;
|
||||
dmu_buf_impl_t *db;
|
||||
int trunc, start, end, shift, i, err;
|
||||
int dnlevel = dn->dn_phys->dn_nlevels;
|
||||
boolean_t trunc = B_FALSE;
|
||||
|
||||
if (blkid > dn->dn_phys->dn_maxblkid)
|
||||
return;
|
||||
|
||||
ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
|
||||
trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
|
||||
if (trunc)
|
||||
if (blkid + nblks > dn->dn_phys->dn_maxblkid) {
|
||||
nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
|
||||
trunc = B_TRUE;
|
||||
}
|
||||
|
||||
/* There are no indirect blocks in the object */
|
||||
if (dnlevel == 1) {
|
||||
|
@ -328,41 +345,36 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
|
|||
return;
|
||||
}
|
||||
ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
|
||||
(void) free_blocks(dn, bp + blkid, nblks, tx);
|
||||
if (trunc) {
|
||||
ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
(dn->dn_phys->dn_datablkszsec<<SPA_MINBLOCKSHIFT));
|
||||
dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
|
||||
ASSERT(off < dn->dn_phys->dn_maxblkid ||
|
||||
dn->dn_phys->dn_maxblkid == 0 ||
|
||||
dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
|
||||
free_blocks(dn, bp + blkid, nblks, tx);
|
||||
} else {
|
||||
int shift = (dnlevel - 1) *
|
||||
(dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
int start = blkid >> shift;
|
||||
int end = (blkid + nblks - 1) >> shift;
|
||||
dmu_buf_impl_t *db;
|
||||
int i;
|
||||
|
||||
ASSERT(start < dn->dn_phys->dn_nblkptr);
|
||||
bp += start;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
|
||||
TRUE, FTAG, &db));
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
free_children(db, blkid, nblks, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
|
||||
start = blkid >> shift;
|
||||
ASSERT(start < dn->dn_phys->dn_nblkptr);
|
||||
end = (blkid + nblks - 1) >> shift;
|
||||
bp += start;
|
||||
for (i = start; i <= end; i++, bp++) {
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
|
||||
ASSERT0(err);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
|
||||
ASSERT3P(db->db_blkptr, ==, bp);
|
||||
(void) free_blocks(dn, bp, 1, tx);
|
||||
}
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
if (trunc) {
|
||||
ASSERTV(uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
ASSERTV(uint64_t off);
|
||||
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
|
||||
|
||||
ASSERTV(off = (dn->dn_phys->dn_maxblkid + 1) *
|
||||
(dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT));
|
||||
dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
|
||||
ASSERT(off < dn->dn_phys->dn_maxblkid ||
|
||||
dn->dn_phys->dn_maxblkid == 0 ||
|
||||
dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
|
||||
|
@ -504,7 +516,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
|||
|
||||
ASSERT(dn->dn_free_txg > 0);
|
||||
if (dn->dn_allocated_txg != dn->dn_free_txg)
|
||||
dbuf_will_dirty(dn->dn_dbuf, tx);
|
||||
dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
|
||||
bzero(dn->dn_phys, sizeof (dnode_phys_t));
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
|
@ -535,6 +547,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
int txgoff = tx->tx_txg & TXG_MASK;
|
||||
list_t *list = &dn->dn_dirty_records[txgoff];
|
||||
boolean_t kill_spill = B_FALSE;
|
||||
boolean_t freeing_dnode;
|
||||
ASSERTV(static const dnode_phys_t zerodn = { 0 });
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
@ -611,13 +624,14 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
dn->dn_next_bonustype[txgoff] = 0;
|
||||
}
|
||||
|
||||
freeing_dnode = dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg;
|
||||
|
||||
/*
|
||||
* We will either remove a spill block when a file is being removed
|
||||
* or we have been asked to remove it.
|
||||
*/
|
||||
if (dn->dn_rm_spillblk[txgoff] ||
|
||||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
|
||||
dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
|
||||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && freeing_dnode)) {
|
||||
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
|
||||
kill_spill = B_TRUE;
|
||||
dn->dn_rm_spillblk[txgoff] = 0;
|
||||
|
@ -640,7 +654,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
if (kill_spill) {
|
||||
(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
|
||||
free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
@ -656,7 +670,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
kmem_free(rp, sizeof (free_range_t));
|
||||
}
|
||||
|
||||
if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
|
||||
if (freeing_dnode) {
|
||||
dnode_sync_free(dn, tx);
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -122,7 +122,9 @@ int
|
|||
dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
boolean_t async)
|
||||
{
|
||||
int used, compressed, uncompressed;
|
||||
int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
|
||||
int compressed = BP_GET_PSIZE(bp);
|
||||
int uncompressed = BP_GET_UCSIZE(bp);
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
@ -130,11 +132,6 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
|||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT(bp->blk_birth <= tx->tx_txg);
|
||||
|
||||
used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
|
||||
compressed = BP_GET_PSIZE(bp);
|
||||
uncompressed = BP_GET_UCSIZE(bp);
|
||||
|
||||
ASSERT(used > 0);
|
||||
if (ds == NULL) {
|
||||
dsl_free(tx->tx_pool, tx->tx_txg, bp);
|
||||
dsl_pool_mos_diduse_space(tx->tx_pool,
|
||||
|
@ -232,7 +229,8 @@ boolean_t
|
|||
dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
|
||||
uint64_t blk_birth)
|
||||
{
|
||||
if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
|
||||
if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
|
||||
(bp != NULL && BP_IS_HOLE(bp)))
|
||||
return (B_FALSE);
|
||||
|
||||
ddt_prefetch(dsl_dataset_get_spa(ds), bp);
|
||||
|
|
|
@ -144,6 +144,8 @@ process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
|||
struct process_old_arg *poa = arg;
|
||||
dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
|
||||
if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
|
||||
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
|
||||
if (poa->ds_prev && !poa->after_branch_point &&
|
||||
|
@ -544,7 +546,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||
struct killarg *ka = arg;
|
||||
dmu_tx_t *tx = ka->tx;
|
||||
|
||||
if (bp == NULL)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return (0);
|
||||
|
||||
if (zb->zb_level == ZB_ZIL_LEVEL) {
|
||||
|
|
|
@ -488,7 +488,7 @@ dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|||
zil_header_t *zh = zsa->zsa_zh;
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
|
||||
if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
|
@ -520,7 +520,8 @@ dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
|||
blkptr_t *bp = &lr->lr_blkptr;
|
||||
zbookmark_t zb;
|
||||
|
||||
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
|
||||
if (BP_IS_HOLE(bp) ||
|
||||
bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
|
@ -775,7 +776,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
|
|||
if (dsl_scan_check_resume(scn, dnp, zb))
|
||||
goto out;
|
||||
|
||||
if (bp->blk_birth == 0)
|
||||
if (BP_IS_HOLE(bp))
|
||||
goto out;
|
||||
|
||||
scn->scn_visited_this_txg++;
|
||||
|
|
|
@ -1872,7 +1872,7 @@ static int
|
|||
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
if (bp != NULL) {
|
||||
if (!BP_IS_HOLE(bp)) {
|
||||
zio_t *rio = arg;
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
void *data = zio_data_buf_alloc(size);
|
||||
|
@ -2328,6 +2328,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
|||
if (spa_version(spa) >= SPA_VERSION_FEATURES) {
|
||||
boolean_t missing_feat_read = B_FALSE;
|
||||
nvlist_t *unsup_feat, *enabled_feat;
|
||||
spa_feature_t i;
|
||||
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
|
||||
&spa->spa_feat_for_read_obj) != 0) {
|
||||
|
@ -2398,6 +2399,33 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
|||
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP));
|
||||
}
|
||||
|
||||
/*
|
||||
* Load refcounts for ZFS features from disk into an in-memory
|
||||
* cache during SPA initialization.
|
||||
*/
|
||||
for (i = 0; i < SPA_FEATURES; i++) {
|
||||
uint64_t refcount;
|
||||
|
||||
error = feature_get_refcount_from_disk(spa,
|
||||
&spa_feature_table[i], &refcount);
|
||||
if (error == 0) {
|
||||
spa->spa_feat_refcount_cache[i] = refcount;
|
||||
} else if (error == ENOTSUP) {
|
||||
spa->spa_feat_refcount_cache[i] =
|
||||
SPA_FEATURE_DISABLED;
|
||||
} else {
|
||||
return (spa_vdev_err(rvd,
|
||||
VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG,
|
||||
&spa->spa_feat_enabled_txg_obj) != 0) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
}
|
||||
|
||||
spa->spa_is_initializing = B_TRUE;
|
||||
|
@ -5820,7 +5848,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
|
|||
|
||||
/*
|
||||
* Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
|
||||
* information. This avoids the dbuf_will_dirty() path and
|
||||
* information. This avoids the dmu_buf_will_dirty() path and
|
||||
* saves us a pre-read to get data we don't actually care about.
|
||||
*/
|
||||
bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
|
||||
|
|
|
@ -469,6 +469,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|||
spa_t *spa;
|
||||
spa_config_dirent_t *dp;
|
||||
int t;
|
||||
int i;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa_namespace_lock));
|
||||
|
||||
|
@ -548,6 +549,15 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|||
|
||||
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
|
||||
|
||||
/*
|
||||
* As a pool is being created, treat all features as disabled by
|
||||
* setting SPA_FEATURE_DISABLED for all entries in the feature
|
||||
* refcount cache.
|
||||
*/
|
||||
for (i = 0; i < SPA_FEATURES; i++) {
|
||||
spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
|
||||
}
|
||||
|
||||
return (spa);
|
||||
}
|
||||
|
||||
|
@ -1094,11 +1104,19 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
|
|||
*/
|
||||
|
||||
void
|
||||
spa_activate_mos_feature(spa_t *spa, const char *feature)
|
||||
spa_activate_mos_feature(spa_t *spa, const char *feature, dmu_tx_t *tx)
|
||||
{
|
||||
if (!nvlist_exists(spa->spa_label_features, feature)) {
|
||||
fnvlist_add_boolean(spa->spa_label_features, feature);
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
/*
|
||||
* When we are creating the pool (tx_txg==TXG_INITIAL), we can't
|
||||
* dirty the vdev config because lock SCL_CONFIG is not held.
|
||||
* Thankfully, in this case we don't need to dirty the config
|
||||
* because it will be written out anyway when we finish
|
||||
* creating the pool.
|
||||
*/
|
||||
if (tx->tx_txg != TXG_INITIAL)
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1257,7 +1275,7 @@ spa_generate_guid(spa_t *spa)
|
|||
}
|
||||
|
||||
void
|
||||
sprintf_blkptr(char *buf, const blkptr_t *bp)
|
||||
snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
|
||||
{
|
||||
char type[256];
|
||||
char *checksum = NULL;
|
||||
|
@ -1279,7 +1297,8 @@ sprintf_blkptr(char *buf, const blkptr_t *bp)
|
|||
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
|
||||
}
|
||||
|
||||
SPRINTF_BLKPTR(snprintf, ' ', buf, bp, type, checksum, compress);
|
||||
SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
|
||||
compress);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1875,7 +1894,7 @@ EXPORT_SYMBOL(spa_strdup);
|
|||
EXPORT_SYMBOL(spa_strfree);
|
||||
EXPORT_SYMBOL(spa_get_random);
|
||||
EXPORT_SYMBOL(spa_generate_guid);
|
||||
EXPORT_SYMBOL(sprintf_blkptr);
|
||||
EXPORT_SYMBOL(snprintf_blkptr);
|
||||
EXPORT_SYMBOL(spa_freeze);
|
||||
EXPORT_SYMBOL(spa_upgrade);
|
||||
EXPORT_SYMBOL(spa_evict_all);
|
||||
|
|
|
@ -248,9 +248,9 @@ vdev_cache_fill(zio_t *fio)
|
|||
}
|
||||
|
||||
/*
|
||||
* Read data from the cache. Returns 0 on cache hit, errno on a miss.
|
||||
* Read data from the cache. Returns B_TRUE cache hit, B_FALSE on miss.
|
||||
*/
|
||||
int
|
||||
boolean_t
|
||||
vdev_cache_read(zio_t *zio)
|
||||
{
|
||||
vdev_cache_t *vc = &zio->io_vd->vdev_cache;
|
||||
|
@ -262,16 +262,16 @@ vdev_cache_read(zio_t *zio)
|
|||
ASSERT(zio->io_type == ZIO_TYPE_READ);
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
|
||||
return (SET_ERROR(EINVAL));
|
||||
return (B_FALSE);
|
||||
|
||||
if (zio->io_size > zfs_vdev_cache_max)
|
||||
return (SET_ERROR(EOVERFLOW));
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* If the I/O straddles two or more cache blocks, don't cache it.
|
||||
*/
|
||||
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
|
||||
return (SET_ERROR(EXDEV));
|
||||
return (B_FALSE);
|
||||
|
||||
ASSERT(cache_phase + zio->io_size <= VCBS);
|
||||
|
||||
|
@ -285,7 +285,7 @@ vdev_cache_read(zio_t *zio)
|
|||
if (ve != NULL) {
|
||||
if (ve->ve_missed_update) {
|
||||
mutex_exit(&vc->vc_lock);
|
||||
return (SET_ERROR(ESTALE));
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
if ((fio = ve->ve_fill_io) != NULL) {
|
||||
|
@ -293,7 +293,7 @@ vdev_cache_read(zio_t *zio)
|
|||
zio_add_child(zio, fio);
|
||||
mutex_exit(&vc->vc_lock);
|
||||
VDCSTAT_BUMP(vdc_stat_delegations);
|
||||
return (0);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
vdev_cache_hit(vc, ve, zio);
|
||||
|
@ -301,14 +301,14 @@ vdev_cache_read(zio_t *zio)
|
|||
|
||||
mutex_exit(&vc->vc_lock);
|
||||
VDCSTAT_BUMP(vdc_stat_hits);
|
||||
return (0);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
ve = vdev_cache_allocate(zio);
|
||||
|
||||
if (ve == NULL) {
|
||||
mutex_exit(&vc->vc_lock);
|
||||
return (SET_ERROR(ENOMEM));
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
|
||||
|
@ -323,7 +323,7 @@ vdev_cache_read(zio_t *zio)
|
|||
zio_nowait(fio);
|
||||
VDCSTAT_BUMP(vdc_stat_misses);
|
||||
|
||||
return (0);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -224,12 +224,32 @@ spa_features_check(spa_t *spa, boolean_t for_write,
|
|||
}
|
||||
|
||||
/*
|
||||
* Use an in-memory cache of feature refcounts for quick retrieval.
|
||||
*
|
||||
* Note: well-designed features will not need to use this; they should
|
||||
* use spa_feature_is_enabled() and spa_feature_is_active() instead.
|
||||
* However, this is non-static for zdb and zhack.
|
||||
*/
|
||||
int
|
||||
feature_get_refcount(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
|
||||
{
|
||||
ASSERT(VALID_FEATURE_FID(feature->fi_feature));
|
||||
if (spa->spa_feat_refcount_cache[feature->fi_feature] ==
|
||||
SPA_FEATURE_DISABLED) {
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
*res = spa->spa_feat_refcount_cache[feature->fi_feature];
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: well-designed features will not need to use this; they should
|
||||
* use spa_feature_is_enabled() and spa_feature_is_active() instead.
|
||||
* However, this is non-static for zdb and zhack.
|
||||
*/
|
||||
int
|
||||
feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
|
||||
uint64_t *res)
|
||||
{
|
||||
int err;
|
||||
uint64_t refcount;
|
||||
|
@ -255,6 +275,26 @@ feature_get_refcount(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
|
|||
return (0);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) {
|
||||
ASSERTV(uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj);
|
||||
|
||||
ASSERT(zfeature_depends_on(feature->fi_feature,
|
||||
SPA_FEATURE_ENABLED_TXG));
|
||||
|
||||
if (!spa_feature_is_enabled(spa, feature->fi_feature)) {
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
ASSERT(enabled_txg_obj != 0);
|
||||
|
||||
VERIFY0(zap_lookup(spa->spa_meta_objset, spa->spa_feat_enabled_txg_obj,
|
||||
feature->fi_guid, sizeof (uint64_t), 1, res));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is non-static for zhack; it should otherwise not be used
|
||||
* outside this file.
|
||||
|
@ -263,16 +303,32 @@ void
|
|||
feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t zapobj = feature->fi_can_readonly ?
|
||||
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
|
||||
uint64_t zapobj;
|
||||
|
||||
ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
|
||||
zapobj = feature->fi_can_readonly ?
|
||||
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
|
||||
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
|
||||
sizeof (uint64_t), 1, &refcount, tx));
|
||||
|
||||
/*
|
||||
* feature_sync is called directly from zhack, allowing the
|
||||
* creation of arbitrary features whose fi_feature field may
|
||||
* be greater than SPA_FEATURES. When called from zhack, the
|
||||
* zfeature_info_t object's fi_feature field will be set to
|
||||
* SPA_FEATURE_NONE.
|
||||
*/
|
||||
if (feature->fi_feature != SPA_FEATURE_NONE) {
|
||||
uint64_t *refcount_cache =
|
||||
&spa->spa_feat_refcount_cache[feature->fi_feature];
|
||||
VERIFY3U(*refcount_cache, ==,
|
||||
atomic_swap_64(refcount_cache, refcount));
|
||||
}
|
||||
|
||||
if (refcount == 0)
|
||||
spa_deactivate_mos_feature(spa, feature->fi_guid);
|
||||
else if (feature->fi_mos)
|
||||
spa_activate_mos_feature(spa, feature->fi_guid);
|
||||
spa_activate_mos_feature(spa, feature->fi_guid, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -282,6 +338,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
|
|||
void
|
||||
feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t initial_refcount = feature->fi_activate_on_enable ? 1 : 0;
|
||||
uint64_t zapobj = feature->fi_can_readonly ?
|
||||
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
|
||||
int i;
|
||||
|
@ -302,27 +359,43 @@ feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
|
|||
VERIFY0(zap_update(spa->spa_meta_objset, spa->spa_feat_desc_obj,
|
||||
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
|
||||
feature->fi_desc, tx));
|
||||
feature_sync(spa, feature, 0, tx);
|
||||
|
||||
feature_sync(spa, feature, initial_refcount, tx);
|
||||
|
||||
if (spa_feature_is_enabled(spa, SPA_FEATURE_ENABLED_TXG)) {
|
||||
uint64_t enabling_txg = dmu_tx_get_txg(tx);
|
||||
|
||||
if (spa->spa_feat_enabled_txg_obj == 0ULL) {
|
||||
spa->spa_feat_enabled_txg_obj =
|
||||
zap_create_link(spa->spa_meta_objset,
|
||||
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FEATURE_ENABLED_TXG, tx);
|
||||
}
|
||||
spa_feature_incr(spa, SPA_FEATURE_ENABLED_TXG, tx);
|
||||
|
||||
VERIFY0(zap_add(spa->spa_meta_objset,
|
||||
spa->spa_feat_enabled_txg_obj, feature->fi_guid,
|
||||
sizeof (uint64_t), 1, &enabling_txg, tx));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t refcount;
|
||||
uint64_t refcount = 0;
|
||||
zfeature_info_t *feature = &spa_feature_table[fid];
|
||||
uint64_t zapobj = feature->fi_can_readonly ?
|
||||
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
|
||||
ASSERTV(uint64_t zapobj = feature->fi_can_readonly ?
|
||||
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj);
|
||||
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(VALID_FEATURE_FID(fid));
|
||||
ASSERT(0 != zapobj);
|
||||
ASSERT(zfeature_is_valid_guid(feature->fi_guid));
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
|
||||
|
||||
VERIFY0(zap_lookup(spa->spa_meta_objset, zapobj, feature->fi_guid,
|
||||
sizeof (uint64_t), 1, &refcount));
|
||||
VERIFY3U(feature_get_refcount(spa, feature, &refcount), !=, ENOTSUP);
|
||||
|
||||
switch (action) {
|
||||
case FEATURE_ACTION_INCR:
|
||||
|
@ -369,7 +442,7 @@ void
|
|||
spa_feature_enable(spa_t *spa, spa_feature_t fid, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(VALID_FEATURE_FID(fid));
|
||||
feature_enable_sync(spa, &spa_feature_table[fid], tx);
|
||||
}
|
||||
|
||||
|
@ -391,7 +464,7 @@ spa_feature_is_enabled(spa_t *spa, spa_feature_t fid)
|
|||
int err;
|
||||
uint64_t refcount = 0;
|
||||
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(VALID_FEATURE_FID(fid));
|
||||
if (spa_version(spa) < SPA_VERSION_FEATURES)
|
||||
return (B_FALSE);
|
||||
|
||||
|
@ -406,7 +479,7 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid)
|
|||
int err;
|
||||
uint64_t refcount = 0;
|
||||
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(VALID_FEATURE_FID(fid));
|
||||
if (spa_version(spa) < SPA_VERSION_FEATURES)
|
||||
return (B_FALSE);
|
||||
|
||||
|
@ -414,3 +487,26 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid)
|
|||
ASSERT(err == 0 || err == ENOTSUP);
|
||||
return (err == 0 && refcount > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* For the feature specified by fid (which must depend on
|
||||
* SPA_FEATURE_ENABLED_TXG), return the TXG at which it was enabled in the
|
||||
* OUT txg argument.
|
||||
*
|
||||
* Returns B_TRUE if the feature is enabled, in which case txg will be filled
|
||||
* with the transaction group in which the specified feature was enabled.
|
||||
* Returns B_FALSE otherwise (i.e. if the feature is not enabled).
|
||||
*/
|
||||
boolean_t
|
||||
spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) {
|
||||
int err;
|
||||
|
||||
ASSERT(VALID_FEATURE_FID(fid));
|
||||
if (spa_version(spa) < SPA_VERSION_FEATURES)
|
||||
return (B_FALSE);
|
||||
|
||||
err = feature_get_enabled_txg(spa, &spa_feature_table[fid], txg);
|
||||
ASSERT(err == 0 || err == ENOTSUP);
|
||||
|
||||
return (err == 0);
|
||||
}
|
||||
|
|
|
@ -119,10 +119,22 @@ zfeature_lookup_name(const char *name, spa_feature_t *res)
|
|||
return (ENOENT);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
|
||||
zfeature_info_t *feature = &spa_feature_table[fid];
|
||||
int i;
|
||||
|
||||
for (i = 0; feature->fi_depends[i] != SPA_FEATURE_NONE; i++) {
|
||||
if (feature->fi_depends[i] == check)
|
||||
return (B_TRUE);
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static void
|
||||
zfeature_register(spa_feature_t fid, const char *guid, const char *name,
|
||||
const char *desc, boolean_t readonly, boolean_t mos,
|
||||
const spa_feature_t *deps)
|
||||
boolean_t activate_on_enable, const spa_feature_t *deps)
|
||||
{
|
||||
zfeature_info_t *feature = &spa_feature_table[fid];
|
||||
static spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
|
||||
|
@ -142,6 +154,7 @@ zfeature_register(spa_feature_t fid, const char *guid, const char *name,
|
|||
feature->fi_desc = desc;
|
||||
feature->fi_can_readonly = readonly;
|
||||
feature->fi_mos = mos;
|
||||
feature->fi_activate_on_enable = activate_on_enable;
|
||||
feature->fi_depends = deps;
|
||||
}
|
||||
|
||||
|
@ -150,18 +163,40 @@ zpool_feature_init(void)
|
|||
{
|
||||
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
|
||||
"com.delphix:async_destroy", "async_destroy",
|
||||
"Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
|
||||
"Destroy filesystems asynchronously.", B_TRUE, B_FALSE,
|
||||
B_FALSE, NULL);
|
||||
|
||||
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
|
||||
"com.delphix:empty_bpobj", "empty_bpobj",
|
||||
"Snapshots use less space.", B_TRUE, B_FALSE, NULL);
|
||||
"Snapshots use less space.", B_TRUE, B_FALSE,
|
||||
B_FALSE, NULL);
|
||||
|
||||
zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
|
||||
"org.illumos:lz4_compress", "lz4_compress",
|
||||
"LZ4 compression algorithm support.", B_FALSE, B_FALSE, NULL);
|
||||
"LZ4 compression algorithm support.", B_FALSE, B_FALSE,
|
||||
B_FALSE, NULL);
|
||||
|
||||
zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM,
|
||||
"com.delphix:spacemap_histogram", "spacemap_histogram",
|
||||
"Spacemaps maintain space histograms.", B_TRUE, B_FALSE, NULL);
|
||||
"Spacemaps maintain space histograms.", B_TRUE, B_FALSE,
|
||||
B_FALSE, NULL);
|
||||
|
||||
zfeature_register(SPA_FEATURE_ENABLED_TXG,
|
||||
"com.delphix:enabled_txg", "enabled_txg",
|
||||
"Record txg at which a feature is enabled", B_TRUE, B_FALSE,
|
||||
B_FALSE, NULL);
|
||||
|
||||
{
|
||||
static spa_feature_t hole_birth_deps[] = { SPA_FEATURE_ENABLED_TXG,
|
||||
SPA_FEATURE_NONE };
|
||||
zfeature_register(SPA_FEATURE_HOLE_BIRTH,
|
||||
"com.delphix:hole_birth", "hole_birth",
|
||||
"Retain hole birth txg for more precise zfs send",
|
||||
B_FALSE, B_TRUE, B_TRUE, hole_birth_deps);
|
||||
}
|
||||
|
||||
zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
"com.delphix:extensible_dataset", "extensible_dataset",
|
||||
"Enhanced dataset functionality, used by other features.",
|
||||
B_FALSE, B_FALSE, NULL);
|
||||
B_FALSE, B_FALSE, B_FALSE, NULL);
|
||||
}
|
||||
|
|
|
@ -558,7 +558,6 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|||
dmu_buf_t *db;
|
||||
timestruc_t now;
|
||||
uint64_t gen, obj;
|
||||
int err;
|
||||
int bonuslen;
|
||||
sa_handle_t *sa_hdl;
|
||||
dmu_object_type_t obj_type;
|
||||
|
@ -591,10 +590,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|||
*/
|
||||
if (S_ISDIR(vap->va_mode)) {
|
||||
if (zsb->z_replay) {
|
||||
err = zap_create_claim_norm(zsb->z_os, obj,
|
||||
VERIFY0(zap_create_claim_norm(zsb->z_os, obj,
|
||||
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
|
||||
obj_type, bonuslen, tx);
|
||||
ASSERT0(err);
|
||||
obj_type, bonuslen, tx));
|
||||
} else {
|
||||
obj = zap_create_norm(zsb->z_os,
|
||||
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
|
||||
|
@ -602,10 +600,9 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|||
}
|
||||
} else {
|
||||
if (zsb->z_replay) {
|
||||
err = dmu_object_claim(zsb->z_os, obj,
|
||||
VERIFY0(dmu_object_claim(zsb->z_os, obj,
|
||||
DMU_OT_PLAIN_FILE_CONTENTS, 0,
|
||||
obj_type, bonuslen, tx);
|
||||
ASSERT0(err);
|
||||
obj_type, bonuslen, tx));
|
||||
} else {
|
||||
obj = dmu_object_alloc(zsb->z_os,
|
||||
DMU_OT_PLAIN_FILE_CONTENTS, 0,
|
||||
|
@ -784,8 +781,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|||
|
||||
if (obj_type == DMU_OT_ZNODE ||
|
||||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
|
||||
err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
|
||||
ASSERT0(err);
|
||||
VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
|
||||
}
|
||||
kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
|
||||
ZFS_OBJ_HOLD_EXIT(zsb, obj);
|
||||
|
|
|
@ -395,7 +395,8 @@ zil_claim_log_block(zilog_t *zilog, blkptr_t *bp, void *tx, uint64_t first_txg)
|
|||
* Claim log block if not already committed and not already claimed.
|
||||
* If tx == NULL, just verify that the block is claimable.
|
||||
*/
|
||||
if (bp->blk_birth < first_txg || zil_bp_tree_add(zilog, bp) != 0)
|
||||
if (BP_IS_HOLE(bp) || bp->blk_birth < first_txg ||
|
||||
zil_bp_tree_add(zilog, bp) != 0)
|
||||
return (0);
|
||||
|
||||
return (zio_wait(zio_claim(NULL, zilog->zl_spa,
|
||||
|
@ -445,7 +446,8 @@ zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
|
|||
* If we previously claimed it, we need to free it.
|
||||
*/
|
||||
if (claim_txg != 0 && lrc->lrc_txtype == TX_WRITE &&
|
||||
bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0)
|
||||
bp->blk_birth >= claim_txg && zil_bp_tree_add(zilog, bp) == 0 &&
|
||||
!BP_IS_HOLE(bp))
|
||||
zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp);
|
||||
|
||||
return (0);
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include <sys/dmu_objset.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
|
@ -1096,7 +1097,7 @@ zio_write_bp_init(zio_t *zio)
|
|||
BP_ZERO(bp);
|
||||
}
|
||||
|
||||
if (bp->blk_birth == zio->io_txg) {
|
||||
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
|
||||
/*
|
||||
* We're rewriting an existing block, which means we're
|
||||
* working on behalf of spa_sync(). For spa_sync() to
|
||||
|
@ -1140,7 +1141,8 @@ zio_write_bp_init(zio_t *zio)
|
|||
* spa_sync() to allocate new blocks, but force rewrites after that.
|
||||
* There should only be a handful of blocks after pass 1 in any case.
|
||||
*/
|
||||
if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
|
||||
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg &&
|
||||
BP_GET_PSIZE(bp) == psize &&
|
||||
pass >= zfs_sync_pass_rewrite) {
|
||||
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
|
||||
ASSERT(psize != 0);
|
||||
|
@ -1152,15 +1154,22 @@ zio_write_bp_init(zio_t *zio)
|
|||
}
|
||||
|
||||
if (psize == 0) {
|
||||
if (zio->io_bp_orig.blk_birth != 0 &&
|
||||
spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
|
||||
BP_SET_LSIZE(bp, lsize);
|
||||
BP_SET_TYPE(bp, zp->zp_type);
|
||||
BP_SET_LEVEL(bp, zp->zp_level);
|
||||
BP_SET_BIRTH(bp, zio->io_txg, 0);
|
||||
}
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
} else {
|
||||
ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
|
||||
BP_SET_LSIZE(bp, lsize);
|
||||
BP_SET_TYPE(bp, zp->zp_type);
|
||||
BP_SET_LEVEL(bp, zp->zp_level);
|
||||
BP_SET_PSIZE(bp, psize);
|
||||
BP_SET_COMPRESS(bp, compress);
|
||||
BP_SET_CHECKSUM(bp, zp->zp_checksum);
|
||||
BP_SET_TYPE(bp, zp->zp_type);
|
||||
BP_SET_LEVEL(bp, zp->zp_level);
|
||||
BP_SET_DEDUP(bp, zp->zp_dedup);
|
||||
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
|
||||
if (zp->zp_dedup) {
|
||||
|
@ -2613,7 +2622,7 @@ zio_vdev_io_start(zio_t *zio)
|
|||
if (vd->vdev_ops->vdev_op_leaf &&
|
||||
(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
|
||||
if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio))
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
if ((zio = vdev_queue_io(zio)) == NULL)
|
||||
|
|
Loading…
Reference in New Issue