ZAP: Massively switch to _by_dnode() interfaces
Before this change ZAP called dnode_hold() for almost every block access, that was clearly visible in profiler under heavy load, such as BRT. This patch makes it always hold the dnode reference between zap_lockdir() and zap_unlockdir(). It allows to avoid most of dnode operations between those. It also adds several new _by_dnode() APIs to ZAP and uses them in BRT code. Also adds dmu_prefetch_by_dnode() variant and uses it in the ZAP code. After this there remains only one call to dmu_buf_dnode_enter(), which seems to be unneeded. So remove the call and the functions. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored by: iXsystems, Inc. Closes #15951
This commit is contained in:
parent
bf8f72359d
commit
80cc516295
|
@ -752,8 +752,6 @@ void dmu_buf_sub_user_size(dmu_buf_t *db, uint64_t nsub);
|
|||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
objset_t *dmu_buf_get_objset(dmu_buf_t *db);
|
||||
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
|
||||
void dmu_buf_dnode_exit(dmu_buf_t *db);
|
||||
|
||||
/* Block until any in-progress dmu buf user evictions complete. */
|
||||
void dmu_buf_user_evict_wait(void);
|
||||
|
@ -902,6 +900,8 @@ extern uint_t zfs_max_recordsize;
|
|||
*/
|
||||
void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
||||
uint64_t len, enum zio_priority pri);
|
||||
void dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
|
||||
uint64_t len, enum zio_priority pri);
|
||||
void dmu_prefetch_dnode(objset_t *os, uint64_t object, enum zio_priority pri);
|
||||
|
||||
typedef struct dmu_object_info {
|
||||
|
|
|
@ -253,6 +253,9 @@ int zap_add_by_dnode(dnode_t *dn, const char *key,
|
|||
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Set the attribute with the given name to the given value. If an
|
||||
|
@ -267,6 +270,9 @@ int zap_update(objset_t *ds, uint64_t zapobj, const char *name,
|
|||
int zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Get the length (in integers) and the integer size of the specified
|
||||
|
@ -292,6 +298,8 @@ int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
|||
int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
|
||||
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, dmu_tx_t *tx);
|
||||
int zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Returns (in *count) the number of attributes in the specified zap
|
||||
|
|
|
@ -145,6 +145,7 @@ typedef struct zap {
|
|||
dmu_buf_user_t zap_dbu;
|
||||
objset_t *zap_objset;
|
||||
uint64_t zap_object;
|
||||
dnode_t *zap_dnode;
|
||||
struct dmu_buf *zap_dbuf;
|
||||
krwlock_t zap_rwlock;
|
||||
boolean_t zap_ismicro;
|
||||
|
|
|
@ -955,52 +955,10 @@ brt_entry_prefetch(brt_t *brt, uint64_t vdevid, brt_entry_t *bre)
|
|||
if (mos_entries == 0)
|
||||
return;
|
||||
|
||||
BRT_DEBUG("ZAP prefetch: object=%llu vdev=%llu offset=%llu",
|
||||
(u_longlong_t)mos_entries, (u_longlong_t)vdevid,
|
||||
(u_longlong_t)bre->bre_offset);
|
||||
(void) zap_prefetch_uint64(brt->brt_mos, mos_entries,
|
||||
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS);
|
||||
}
|
||||
|
||||
static int
|
||||
brt_entry_update(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
|
||||
{
|
||||
int error;
|
||||
|
||||
ASSERT(RW_LOCK_HELD(&brt->brt_lock));
|
||||
ASSERT(brtvd->bv_mos_entries != 0);
|
||||
ASSERT(bre->bre_refcount > 0);
|
||||
|
||||
error = zap_update_uint64(brt->brt_mos, brtvd->bv_mos_entries,
|
||||
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, 1,
|
||||
sizeof (bre->bre_refcount), &bre->bre_refcount, tx);
|
||||
BRT_DEBUG("ZAP update: object=%llu vdev=%llu offset=%llu count=%llu "
|
||||
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
|
||||
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
|
||||
(u_longlong_t)bre->bre_refcount, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
brt_entry_remove(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
|
||||
{
|
||||
int error;
|
||||
|
||||
ASSERT(RW_LOCK_HELD(&brt->brt_lock));
|
||||
ASSERT(brtvd->bv_mos_entries != 0);
|
||||
ASSERT0(bre->bre_refcount);
|
||||
|
||||
error = zap_remove_uint64(brt->brt_mos, brtvd->bv_mos_entries,
|
||||
(uint64_t *)&bre->bre_offset, BRT_KEY_WORDS, tx);
|
||||
BRT_DEBUG("ZAP remove: object=%llu vdev=%llu offset=%llu count=%llu "
|
||||
"error=%d", (u_longlong_t)brtvd->bv_mos_entries,
|
||||
(u_longlong_t)brtvd->bv_vdevid, (u_longlong_t)bre->bre_offset,
|
||||
(u_longlong_t)bre->bre_refcount, error);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return TRUE if we _can_ have BRT entry for this bp. It might be false
|
||||
* positive, but gives us quick answer if we should look into BRT, which
|
||||
|
@ -1559,24 +1517,16 @@ brt_pending_apply(spa_t *spa, uint64_t txg)
|
|||
}
|
||||
|
||||
static void
|
||||
brt_sync_entry(brt_t *brt, brt_vdev_t *brtvd, brt_entry_t *bre, dmu_tx_t *tx)
|
||||
brt_sync_entry(dnode_t *dn, brt_entry_t *bre, dmu_tx_t *tx)
|
||||
{
|
||||
|
||||
ASSERT(RW_WRITE_HELD(&brt->brt_lock));
|
||||
ASSERT(brtvd->bv_mos_entries != 0);
|
||||
|
||||
if (bre->bre_refcount == 0) {
|
||||
int error;
|
||||
|
||||
error = brt_entry_remove(brt, brtvd, bre, tx);
|
||||
ASSERT(error == 0 || error == ENOENT);
|
||||
/*
|
||||
* If error == ENOENT then zfs_clone_range() was done from a
|
||||
* removed (but opened) file (open(), unlink()).
|
||||
*/
|
||||
ASSERT(brt_entry_lookup(brt, brtvd, bre) == ENOENT);
|
||||
int error = zap_remove_uint64_by_dnode(dn, &bre->bre_offset,
|
||||
BRT_KEY_WORDS, tx);
|
||||
VERIFY(error == 0 || error == ENOENT);
|
||||
} else {
|
||||
VERIFY0(brt_entry_update(brt, brtvd, bre, tx));
|
||||
VERIFY0(zap_update_uint64_by_dnode(dn, &bre->bre_offset,
|
||||
BRT_KEY_WORDS, 1, sizeof (bre->bre_refcount),
|
||||
&bre->bre_refcount, tx));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1585,6 +1535,7 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
|
|||
{
|
||||
brt_vdev_t *brtvd;
|
||||
brt_entry_t *bre;
|
||||
dnode_t *dn;
|
||||
uint64_t vdevid;
|
||||
void *c;
|
||||
|
||||
|
@ -1608,14 +1559,19 @@ brt_sync_table(brt_t *brt, dmu_tx_t *tx)
|
|||
if (brtvd->bv_mos_brtvdev == 0)
|
||||
brt_vdev_create(brt, brtvd, tx);
|
||||
|
||||
VERIFY0(dnode_hold(brt->brt_mos, brtvd->bv_mos_entries,
|
||||
FTAG, &dn));
|
||||
|
||||
c = NULL;
|
||||
while ((bre = avl_destroy_nodes(&brtvd->bv_tree, &c)) != NULL) {
|
||||
brt_sync_entry(brt, brtvd, bre, tx);
|
||||
brt_sync_entry(dn, bre, tx);
|
||||
brt_entry_free(bre);
|
||||
ASSERT(brt->brt_nentries > 0);
|
||||
brt->brt_nentries--;
|
||||
}
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
brt_vdev_sync(brt, brtvd, tx);
|
||||
|
||||
if (brtvd->bv_totalcount == 0)
|
||||
|
|
|
@ -4174,21 +4174,6 @@ dmu_buf_get_objset(dmu_buf_t *db)
|
|||
return (dbi->db_objset);
|
||||
}
|
||||
|
||||
dnode_t *
|
||||
dmu_buf_dnode_enter(dmu_buf_t *db)
|
||||
{
|
||||
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
|
||||
DB_DNODE_ENTER(dbi);
|
||||
return (DB_DNODE(dbi));
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_dnode_exit(dmu_buf_t *db)
|
||||
{
|
||||
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
|
||||
DB_DNODE_EXIT(dbi);
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
|
||||
{
|
||||
|
|
|
@ -712,8 +712,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
|||
uint64_t len, zio_priority_t pri)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int64_t level2 = level;
|
||||
uint64_t start, end, start2, end2;
|
||||
|
||||
if (dmu_prefetch_max == 0 || len == 0) {
|
||||
dmu_prefetch_dnode(os, object, pri);
|
||||
|
@ -723,6 +721,18 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
|||
if (dnode_hold(os, object, FTAG, &dn) != 0)
|
||||
return;
|
||||
|
||||
dmu_prefetch_by_dnode(dn, level, offset, len, pri);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
|
||||
uint64_t len, zio_priority_t pri)
|
||||
{
|
||||
int64_t level2 = level;
|
||||
uint64_t start, end, start2, end2;
|
||||
|
||||
/*
|
||||
* Depending on len we may do two prefetches: blocks [start, end) at
|
||||
* level, and following blocks [start2, end2) at higher level2.
|
||||
|
@ -762,8 +772,6 @@ dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
|
|||
for (uint64_t i = start2; i < end2; i++)
|
||||
dbuf_prefetch(dn, level2, i, pri, 0);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2563,6 +2571,8 @@ EXPORT_SYMBOL(dmu_bonus_hold_by_dnode);
|
|||
EXPORT_SYMBOL(dmu_buf_hold_array_by_bonus);
|
||||
EXPORT_SYMBOL(dmu_buf_rele_array);
|
||||
EXPORT_SYMBOL(dmu_prefetch);
|
||||
EXPORT_SYMBOL(dmu_prefetch_by_dnode);
|
||||
EXPORT_SYMBOL(dmu_prefetch_dnode);
|
||||
EXPORT_SYMBOL(dmu_free_range);
|
||||
EXPORT_SYMBOL(dmu_free_long_range);
|
||||
EXPORT_SYMBOL(dmu_free_long_object);
|
||||
|
|
|
@ -2353,7 +2353,6 @@ receive_process_write_record(struct receive_writer_arg *rwa,
|
|||
if (rwa->heal) {
|
||||
blkptr_t *bp;
|
||||
dmu_buf_t *dbp;
|
||||
dnode_t *dn;
|
||||
int flags = DB_RF_CANFAIL;
|
||||
|
||||
if (rwa->raw)
|
||||
|
@ -2385,19 +2384,15 @@ receive_process_write_record(struct receive_writer_arg *rwa,
|
|||
dmu_buf_rele(dbp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
dn = dmu_buf_dnode_enter(dbp);
|
||||
/* Make sure the on-disk block and recv record sizes match */
|
||||
if (drrw->drr_logical_size !=
|
||||
dn->dn_datablkszsec << SPA_MINBLOCKSHIFT) {
|
||||
if (drrw->drr_logical_size != dbp->db_size) {
|
||||
err = ENOTSUP;
|
||||
dmu_buf_dnode_exit(dbp);
|
||||
dmu_buf_rele(dbp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
/* Get the block pointer for the corrupted block */
|
||||
bp = dmu_buf_get_blkptr(dbp);
|
||||
err = do_corrective_recv(rwa, drrw, rrd, bp);
|
||||
dmu_buf_dnode_exit(dbp);
|
||||
dmu_buf_rele(dbp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
|
|
@ -133,7 +133,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
|
|||
* set up block 1 - the first leaf
|
||||
*/
|
||||
dmu_buf_t *db;
|
||||
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
|
@ -182,7 +182,7 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
|
|||
newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
|
||||
tbl->zt_nextblk = newblk;
|
||||
ASSERT0(tbl->zt_blks_copied);
|
||||
dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
|
||||
dmu_prefetch_by_dnode(zap->zap_dnode, 0,
|
||||
tbl->zt_blk << bs, tbl->zt_numblks << bs,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
}
|
||||
|
@ -193,21 +193,21 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
|
|||
|
||||
uint64_t b = tbl->zt_blks_copied;
|
||||
dmu_buf_t *db_old;
|
||||
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
/* first half of entries in old[b] go to new[2*b+0] */
|
||||
dmu_buf_t *db_new;
|
||||
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
|
||||
dmu_buf_will_dirty(db_new, tx);
|
||||
transfer_func(db_old->db_data, db_new->db_data, hepb);
|
||||
dmu_buf_rele(db_new, FTAG);
|
||||
|
||||
/* second half of entries in old[b] go to new[2*b+1] */
|
||||
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
|
||||
dmu_buf_will_dirty(db_new, tx);
|
||||
transfer_func((uint64_t *)db_old->db_data + hepb,
|
||||
|
@ -255,7 +255,7 @@ zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
|
|||
uint64_t off = idx & ((1<<(bs-3))-1);
|
||||
|
||||
dmu_buf_t *db;
|
||||
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
@ -267,7 +267,7 @@ zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
|
|||
uint64_t off2 = idx2 & ((1<<(bs-3))-1);
|
||||
dmu_buf_t *db2;
|
||||
|
||||
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
|
@ -296,16 +296,9 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
|
|||
uint64_t blk = idx >> (bs-3);
|
||||
uint64_t off = idx & ((1<<(bs-3))-1);
|
||||
|
||||
/*
|
||||
* Note: this is equivalent to dmu_buf_hold(), but we use
|
||||
* _dnode_enter / _by_dnode because it's faster because we don't
|
||||
* have to hold the dnode.
|
||||
*/
|
||||
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
dmu_buf_t *db;
|
||||
int err = dmu_buf_hold_by_dnode(dn,
|
||||
int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
*valp = ((uint64_t *)db->db_data)[off];
|
||||
|
@ -319,11 +312,9 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
|
|||
*/
|
||||
blk = (idx*2) >> (bs-3);
|
||||
|
||||
dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
err = dmu_buf_hold_by_dnode(dn,
|
||||
err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(tbl->zt_nextblk + blk) << bs, FTAG, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err == 0)
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
|
@ -368,7 +359,7 @@ zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
|
|||
|
||||
uint64_t newblk = zap_allocate_blocks(zap, 1);
|
||||
dmu_buf_t *db_new;
|
||||
int err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
|
@ -433,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
|
|||
l->l_blkid = zap_allocate_blocks(zap, 1);
|
||||
l->l_dbuf = NULL;
|
||||
|
||||
VERIFY0(dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
|
||||
DMU_READ_NO_PREFETCH));
|
||||
dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
|
||||
|
@ -533,10 +524,8 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
|
|||
return (SET_ERROR(ENOENT));
|
||||
|
||||
int bs = FZAP_BLOCK_SHIFT(zap);
|
||||
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
int err = dmu_buf_hold_by_dnode(dn,
|
||||
int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
|
@ -985,7 +974,7 @@ fzap_prefetch(zap_name_t *zn)
|
|||
if (zap_idx_to_blk(zap, idx, &blk) != 0)
|
||||
return;
|
||||
int bs = FZAP_BLOCK_SHIFT(zap);
|
||||
dmu_prefetch(zap->zap_objset, zap->zap_object, 0, blk << bs, 1 << bs,
|
||||
dmu_prefetch_by_dnode(zap->zap_dnode, 0, blk << bs, 1 << bs,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
}
|
||||
|
||||
|
@ -1228,7 +1217,7 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
|
|||
*/
|
||||
if (zc->zc_hash == 0 && zap_iterate_prefetch &&
|
||||
zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
|
||||
dmu_prefetch(zc->zc_objset, zc->zc_zapobj, 0, 0,
|
||||
dmu_prefetch_by_dnode(zap->zap_dnode, 0, 0,
|
||||
zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
|
||||
ZIO_PRIORITY_ASYNC_READ);
|
||||
}
|
||||
|
@ -1356,7 +1345,7 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
|
|||
zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
|
||||
1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
|
||||
} else {
|
||||
dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
|
||||
dmu_prefetch_by_dnode(zap->zap_dnode, 0,
|
||||
zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
|
||||
zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
|
||||
ZIO_PRIORITY_SYNC_READ);
|
||||
|
@ -1366,7 +1355,7 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
|
|||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
err = dmu_buf_hold_by_dnode(zap->zap_dnode,
|
||||
(zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs,
|
||||
FTAG, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err == 0) {
|
||||
|
|
|
@ -415,7 +415,7 @@ mze_destroy(zap_t *zap)
|
|||
}
|
||||
|
||||
static zap_t *
|
||||
mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
|
||||
mzap_open(dmu_buf_t *db)
|
||||
{
|
||||
zap_t *winner;
|
||||
uint64_t *zap_hdr = (uint64_t *)db->db_data;
|
||||
|
@ -427,8 +427,8 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
|
|||
zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
|
||||
rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL);
|
||||
rw_enter(&zap->zap_rwlock, RW_WRITER);
|
||||
zap->zap_objset = os;
|
||||
zap->zap_object = obj;
|
||||
zap->zap_objset = dmu_buf_get_objset(db);
|
||||
zap->zap_object = db->db_object;
|
||||
zap->zap_dbuf = db;
|
||||
|
||||
if (zap_block_type != ZBT_MICRO) {
|
||||
|
@ -518,7 +518,7 @@ handle_winner:
|
|||
* have the specified tag.
|
||||
*/
|
||||
static int
|
||||
zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
|
||||
zap_lockdir_impl(dnode_t *dn, dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
|
||||
{
|
||||
ASSERT0(db->db_offset);
|
||||
|
@ -528,13 +528,13 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
|
|||
|
||||
*zapp = NULL;
|
||||
|
||||
dmu_object_info_from_db(db, &doi);
|
||||
dmu_object_info_from_dnode(dn, &doi);
|
||||
if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
zap_t *zap = dmu_buf_get_user(db);
|
||||
if (zap == NULL) {
|
||||
zap = mzap_open(os, obj, db);
|
||||
zap = mzap_open(db);
|
||||
if (zap == NULL) {
|
||||
/*
|
||||
* mzap_open() didn't like what it saw on-disk.
|
||||
|
@ -563,6 +563,7 @@ zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx,
|
|||
}
|
||||
|
||||
zap->zap_objset = os;
|
||||
zap->zap_dnode = dn;
|
||||
|
||||
if (lt == RW_WRITER)
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
@ -598,23 +599,16 @@ zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
|
|||
zap_t **zapp)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
int err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
}
|
||||
#ifdef ZFS_DEBUG
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
dmu_object_info_from_db(db, &doi);
|
||||
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
|
||||
}
|
||||
#endif
|
||||
|
||||
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0) {
|
||||
err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0)
|
||||
dmu_buf_rele(db, tag);
|
||||
}
|
||||
else
|
||||
VERIFY(dnode_add_ref(dn, tag));
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -623,21 +617,23 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
|||
krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag,
|
||||
zap_t **zapp)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
err = dnode_hold(os, obj, tag, &dn);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
#ifdef ZFS_DEBUG
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
dmu_object_info_from_db(db, &doi);
|
||||
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
|
||||
err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
dnode_rele(dn, tag);
|
||||
return (err);
|
||||
}
|
||||
#endif
|
||||
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0)
|
||||
err = zap_lockdir_impl(dn, db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0) {
|
||||
dmu_buf_rele(db, tag);
|
||||
dnode_rele(dn, tag);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -645,6 +641,7 @@ void
|
|||
zap_unlockdir(zap_t *zap, const void *tag)
|
||||
{
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
dnode_rele(zap->zap_dnode, tag);
|
||||
dmu_buf_rele(zap->zap_dbuf, tag);
|
||||
}
|
||||
|
||||
|
@ -730,7 +727,8 @@ mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx)
|
|||
if (flags != 0) {
|
||||
zap_t *zap;
|
||||
/* Only fat zap supports flags; upgrade immediately. */
|
||||
VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER,
|
||||
VERIFY(dnode_add_ref(dn, FTAG));
|
||||
VERIFY0(zap_lockdir_impl(dn, db, FTAG, tx, RW_WRITER,
|
||||
B_FALSE, B_FALSE, &zap));
|
||||
VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags));
|
||||
zap_unlockdir(zap, FTAG);
|
||||
|
@ -1325,6 +1323,26 @@ zap_add_by_dnode(dnode_t *dn, const char *key,
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_add_uint64_impl(zap_t *zap, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx, const void *tag)
|
||||
{
|
||||
int err;
|
||||
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, tag);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_add() failed */
|
||||
zap_unlockdir(zap, tag);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
|
@ -1336,16 +1354,26 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
|||
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
err = zap_add_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, val, tx, FTAG);
|
||||
/* zap_add_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_add() failed */
|
||||
zap_unlockdir(zap, FTAG);
|
||||
|
||||
int
|
||||
zap_add_uint64_by_dnode(dnode_t *dn, const uint64_t *key,
|
||||
int key_numints, int integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_add_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, val, tx, FTAG);
|
||||
/* zap_add_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -1396,10 +1424,30 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_update_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx,
|
||||
const void *tag)
|
||||
{
|
||||
int err;
|
||||
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, tag);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_update(zn, integer_size, num_integers, val, tag, tx);
|
||||
zap = zn->zn_zap; /* fzap_update() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
|
||||
zap_unlockdir(zap, tag);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
|
||||
int key_numints, int integer_size, uint64_t num_integers, const void *val,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
|
@ -1407,16 +1455,25 @@ zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
|||
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
err = zap_update_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, val, tx, FTAG);
|
||||
/* zap_update_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_update() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
|
||||
zap_unlockdir(zap, FTAG);
|
||||
|
||||
int
|
||||
zap_update_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_update_uint64_impl(zap, key, key_numints,
|
||||
integer_size, num_integers, val, tx, FTAG);
|
||||
/* zap_update_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -1481,6 +1538,23 @@ zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_remove_uint64_impl(zap_t *zap, const uint64_t *key, int key_numints,
|
||||
dmu_tx_t *tx, const void *tag)
|
||||
{
|
||||
int err;
|
||||
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, tag);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_remove(zn, tx);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, tag);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints, dmu_tx_t *tx)
|
||||
|
@ -1491,14 +1565,23 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
|||
zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
|
||||
/* zap_remove_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
err = fzap_remove(zn, tx);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
|
||||
int
|
||||
zap_remove_uint64_by_dnode(dnode_t *dn, const uint64_t *key, int key_numints,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
zap_t *zap;
|
||||
|
||||
int err =
|
||||
zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_remove_uint64_impl(zap, key, key_numints, tx, FTAG);
|
||||
/* zap_remove_uint64_impl() calls zap_unlockdir() */
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -1704,14 +1787,17 @@ EXPORT_SYMBOL(zap_prefetch_uint64);
|
|||
EXPORT_SYMBOL(zap_add);
|
||||
EXPORT_SYMBOL(zap_add_by_dnode);
|
||||
EXPORT_SYMBOL(zap_add_uint64);
|
||||
EXPORT_SYMBOL(zap_add_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_update);
|
||||
EXPORT_SYMBOL(zap_update_uint64);
|
||||
EXPORT_SYMBOL(zap_update_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_length);
|
||||
EXPORT_SYMBOL(zap_length_uint64);
|
||||
EXPORT_SYMBOL(zap_remove);
|
||||
EXPORT_SYMBOL(zap_remove_by_dnode);
|
||||
EXPORT_SYMBOL(zap_remove_norm);
|
||||
EXPORT_SYMBOL(zap_remove_uint64);
|
||||
EXPORT_SYMBOL(zap_remove_uint64_by_dnode);
|
||||
EXPORT_SYMBOL(zap_count);
|
||||
EXPORT_SYMBOL(zap_value_search);
|
||||
EXPORT_SYMBOL(zap_join);
|
||||
|
|
Loading…
Reference in New Issue