Merge commit 'refs/top-bases/gcc-missing-case' into gcc-missing-case
Conflicts: lib/libzfs/libzfs_dataset.c module/zfs/spa.c
This commit is contained in:
commit
a5a71d128d
|
@ -1 +1 @@
|
|||
http://dlc.sun.com/osol/on/downloads/b108/on-src.tar.bz2
|
||||
http://dlc.sun.com/osol/on/downloads/b117/on-src.tar.bz2
|
||||
|
|
138
cmd/zdb/zdb.c
138
cmd/zdb/zdb.c
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -102,7 +102,9 @@ usage(void)
|
|||
(void) fprintf(stderr, " -C cached pool configuration\n");
|
||||
(void) fprintf(stderr, " -i intent logs\n");
|
||||
(void) fprintf(stderr, " -b block statistics\n");
|
||||
(void) fprintf(stderr, " -c checksum all data blocks\n");
|
||||
(void) fprintf(stderr, " -m metaslabs\n");
|
||||
(void) fprintf(stderr, " -c checksum all metadata (twice for "
|
||||
"all data) blocks\n");
|
||||
(void) fprintf(stderr, " -s report stats on zdb's I/O\n");
|
||||
(void) fprintf(stderr, " -S <user|all>:<cksum_alg|all> -- "
|
||||
"dump blkptr signatures\n");
|
||||
|
@ -125,6 +127,11 @@ usage(void)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called for usage errors that are discovered after a call to spa_open(),
|
||||
* dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
|
||||
*/
|
||||
|
||||
static void
|
||||
fatal(const char *fmt, ...)
|
||||
{
|
||||
|
@ -136,7 +143,7 @@ fatal(const char *fmt, ...)
|
|||
va_end(ap);
|
||||
(void) fprintf(stderr, "\n");
|
||||
|
||||
abort();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -209,7 +216,7 @@ dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
|
|||
size_t nvsize = *(uint64_t *)data;
|
||||
char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
|
||||
|
||||
VERIFY(0 == dmu_read(os, object, 0, nvsize, packed));
|
||||
VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
|
||||
|
||||
VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
|
||||
|
||||
|
@ -435,7 +442,7 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
|
|||
alloc = 0;
|
||||
for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
|
||||
VERIFY(0 == dmu_read(os, smo->smo_object, offset,
|
||||
sizeof (entry), &entry));
|
||||
sizeof (entry), &entry, DMU_READ_PREFETCH));
|
||||
if (SM_DEBUG_DECODE(entry)) {
|
||||
(void) printf("\t\t[%4llu] %s: txg %llu, pass %llu\n",
|
||||
(u_longlong_t)(offset / sizeof (entry)),
|
||||
|
@ -466,6 +473,21 @@ dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dump_metaslab_stats(metaslab_t *msp)
|
||||
{
|
||||
char maxbuf[5];
|
||||
space_map_t *sm = &msp->ms_map;
|
||||
avl_tree_t *t = sm->sm_pp_root;
|
||||
int free_pct = sm->sm_space * 100 / sm->sm_size;
|
||||
|
||||
nicenum(space_map_maxsize(sm), maxbuf);
|
||||
|
||||
(void) printf("\t %20s %10lu %7s %6s %4s %4d%%\n",
|
||||
"segments", avl_numnodes(t), "maxsize", maxbuf,
|
||||
"freepct", free_pct);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_metaslab(metaslab_t *msp)
|
||||
{
|
||||
|
@ -476,22 +498,28 @@ dump_metaslab(metaslab_t *msp)
|
|||
|
||||
nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
|
||||
|
||||
if (dump_opt['d'] <= 5) {
|
||||
(void) printf("\t%10llx %10llu %5s\n",
|
||||
(u_longlong_t)msp->ms_map.sm_start,
|
||||
(u_longlong_t)smo->smo_object,
|
||||
freebuf);
|
||||
return;
|
||||
}
|
||||
|
||||
(void) printf(
|
||||
"\tvdev %llu offset %08llx spacemap %4llu free %5s\n",
|
||||
"\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
|
||||
(u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
|
||||
(u_longlong_t)smo->smo_object, freebuf);
|
||||
|
||||
ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
|
||||
if (dump_opt['m'] > 1) {
|
||||
mutex_enter(&msp->ms_lock);
|
||||
VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops,
|
||||
SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0);
|
||||
dump_metaslab_stats(msp);
|
||||
space_map_unload(&msp->ms_map);
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
|
||||
ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
|
||||
|
||||
mutex_enter(&msp->ms_lock);
|
||||
dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -506,14 +534,12 @@ dump_metaslabs(spa_t *spa)
|
|||
for (c = 0; c < rvd->vdev_children; c++) {
|
||||
vd = rvd->vdev_child[c];
|
||||
|
||||
(void) printf("\n vdev %llu\n\n", (u_longlong_t)vd->vdev_id);
|
||||
(void) printf("\t%-10s %-19s %-15s %-10s\n",
|
||||
"vdev", "offset", "spacemap", "free");
|
||||
(void) printf("\t%10s %19s %15s %10s\n",
|
||||
"----------", "-------------------",
|
||||
"---------------", "-------------");
|
||||
|
||||
if (dump_opt['d'] <= 5) {
|
||||
(void) printf("\t%10s %10s %5s\n",
|
||||
"offset", "spacemap", "free");
|
||||
(void) printf("\t%10s %10s %5s\n",
|
||||
"------", "--------", "----");
|
||||
}
|
||||
for (m = 0; m < vd->vdev_ms_count; m++)
|
||||
dump_metaslab(vd->vdev_ms[m]);
|
||||
(void) printf("\n");
|
||||
|
@ -917,6 +943,7 @@ dump_uidgid(objset_t *os, znode_phys_t *zp)
|
|||
/* first find the fuid object. It lives in the master node */
|
||||
VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
|
||||
8, 1, &fuid_obj) == 0);
|
||||
zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
|
||||
(void) zfs_fuid_table_load(os, fuid_obj,
|
||||
&idx_tree, &domain_tree);
|
||||
fuid_table_loaded = B_TRUE;
|
||||
|
@ -1020,6 +1047,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES] = {
|
|||
dump_packed_nvlist, /* FUID nvlist size */
|
||||
dump_zap, /* DSL dataset next clones */
|
||||
dump_zap, /* DSL scrub queue */
|
||||
dump_zap, /* ZFS user/group used */
|
||||
dump_zap, /* ZFS user/group quota */
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -1083,6 +1112,14 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
|
|||
}
|
||||
|
||||
if (verbosity >= 4) {
|
||||
(void) printf("\tdnode flags: %s%s\n",
|
||||
(dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
|
||||
"USED_BYTES " : "",
|
||||
(dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
|
||||
"USERUSED_ACCOUNTED " : "");
|
||||
(void) printf("\tdnode maxblkid: %llu\n",
|
||||
(longlong_t)dn->dn_phys->dn_maxblkid);
|
||||
|
||||
object_viewer[doi.doi_bonus_type](os, object, bonus, bsize);
|
||||
object_viewer[doi.doi_type](os, object, NULL, 0);
|
||||
*print_header = 1;
|
||||
|
@ -1137,7 +1174,7 @@ dump_dir(objset_t *os)
|
|||
uint64_t object, object_count;
|
||||
uint64_t refdbytes, usedobjs, scratch;
|
||||
char numbuf[8];
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
char blkbuf[BP_SPRINTF_LEN + 20];
|
||||
char osname[MAXNAMELEN];
|
||||
char *type = "UNKNOWN";
|
||||
int verbosity = dump_opt['d'];
|
||||
|
@ -1163,8 +1200,8 @@ dump_dir(objset_t *os)
|
|||
nicenum(refdbytes, numbuf);
|
||||
|
||||
if (verbosity >= 4) {
|
||||
(void) strcpy(blkbuf, ", rootbp ");
|
||||
sprintf_blkptr(blkbuf + strlen(blkbuf),
|
||||
(void) sprintf(blkbuf + strlen(blkbuf), ", rootbp ");
|
||||
(void) sprintf_blkptr(blkbuf + strlen(blkbuf),
|
||||
BP_SPRINTF_LEN - strlen(blkbuf), os->os->os_rootbp);
|
||||
} else {
|
||||
blkbuf[0] = '\0';
|
||||
|
@ -1199,7 +1236,12 @@ dump_dir(objset_t *os)
|
|||
}
|
||||
|
||||
dump_object(os, 0, verbosity, &print_header);
|
||||
object_count = 1;
|
||||
object_count = 0;
|
||||
if (os->os->os_userused_dnode &&
|
||||
os->os->os_userused_dnode->dn_type != 0) {
|
||||
dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
|
||||
dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
|
||||
}
|
||||
|
||||
object = 0;
|
||||
while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
|
||||
|
@ -1211,8 +1253,10 @@ dump_dir(objset_t *os)
|
|||
|
||||
(void) printf("\n");
|
||||
|
||||
if (error != ESRCH)
|
||||
fatal("dmu_object_next() = %d", error);
|
||||
if (error != ESRCH) {
|
||||
(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1395,7 +1439,8 @@ static space_map_ops_t zdb_space_map_ops = {
|
|||
zdb_space_map_unload,
|
||||
NULL, /* alloc */
|
||||
zdb_space_map_claim,
|
||||
NULL /* free */
|
||||
NULL, /* free */
|
||||
NULL /* maxsize */
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -1505,13 +1550,25 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
|||
{
|
||||
zdb_cb_t *zcb = arg;
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
dmu_object_type_t type;
|
||||
boolean_t is_l0_metadata;
|
||||
|
||||
if (bp == NULL)
|
||||
return (0);
|
||||
|
||||
zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
|
||||
type = BP_GET_TYPE(bp);
|
||||
|
||||
if (dump_opt['c'] || dump_opt['S']) {
|
||||
zdb_count_block(spa, zcb, bp, type);
|
||||
|
||||
/*
|
||||
* if we do metadata-only checksumming there's no need to checksum
|
||||
* indirect blocks here because it is done during traverse
|
||||
*/
|
||||
is_l0_metadata = (BP_GET_LEVEL(bp) == 0 && type < DMU_OT_NUMTYPES &&
|
||||
dmu_ot[type].ot_metadata);
|
||||
|
||||
if (dump_opt['c'] > 1 || dump_opt['S'] ||
|
||||
(dump_opt['c'] && is_l0_metadata)) {
|
||||
int ioerr, size;
|
||||
void *data;
|
||||
|
||||
|
@ -1523,7 +1580,7 @@ zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
|||
free(data);
|
||||
|
||||
/* We expect io errors on intent log */
|
||||
if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
|
||||
if (ioerr && type != DMU_OT_INTENT_LOG) {
|
||||
zcb->zcb_haderrors = 1;
|
||||
zcb->zcb_errors[ioerr]++;
|
||||
|
||||
|
@ -1571,8 +1628,9 @@ dump_block_stats(spa_t *spa)
|
|||
int c, e;
|
||||
|
||||
if (!dump_opt['S']) {
|
||||
(void) printf("\nTraversing all blocks %s%s%s%s...\n",
|
||||
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
|
||||
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
||||
(dump_opt['c'] == 1) ? "metadata " : "",
|
||||
dump_opt['c'] ? "checksums " : "",
|
||||
(dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
|
||||
!dump_opt['L'] ? "nothing leaked " : "");
|
||||
|
@ -1772,14 +1830,17 @@ dump_zpool(spa_t *spa)
|
|||
if (dump_opt['u'])
|
||||
dump_uberblock(&spa->spa_uberblock);
|
||||
|
||||
if (dump_opt['d'] || dump_opt['i']) {
|
||||
if (dump_opt['d'] || dump_opt['i'] || dump_opt['m']) {
|
||||
dump_dir(dp->dp_meta_objset);
|
||||
if (dump_opt['d'] >= 3) {
|
||||
dump_bplist(dp->dp_meta_objset,
|
||||
spa->spa_sync_bplist_obj, "Deferred frees");
|
||||
dump_dtl(spa->spa_root_vdev, 0);
|
||||
dump_metaslabs(spa);
|
||||
}
|
||||
|
||||
if (dump_opt['d'] >= 3 || dump_opt['m'])
|
||||
dump_metaslabs(spa);
|
||||
|
||||
(void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL,
|
||||
DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
|
||||
}
|
||||
|
@ -2255,13 +2316,14 @@ main(int argc, char **argv)
|
|||
|
||||
dprintf_setup(&argc, argv);
|
||||
|
||||
while ((c = getopt(argc, argv, "udibcsvCLS:U:lRep:t:")) != -1) {
|
||||
while ((c = getopt(argc, argv, "udibcmsvCLS:U:lRep:t:")) != -1) {
|
||||
switch (c) {
|
||||
case 'u':
|
||||
case 'd':
|
||||
case 'i':
|
||||
case 'b':
|
||||
case 'c':
|
||||
case 'm':
|
||||
case 's':
|
||||
case 'C':
|
||||
case 'l':
|
||||
|
@ -2397,7 +2459,7 @@ main(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (error == 0)
|
||||
error = spa_import_faulted(argv[0],
|
||||
error = spa_import_verbatim(argv[0],
|
||||
exported_conf, nvl);
|
||||
|
||||
nvlist_free(nvl);
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Print intent log header and statistics.
|
||||
*/
|
||||
|
@ -345,8 +343,10 @@ dump_intent_log(zilog_t *zilog)
|
|||
if (zh->zh_log.blk_birth == 0 || verbose < 2)
|
||||
return;
|
||||
|
||||
(void) printf("\n ZIL header: claim_txg %llu, seq %llu\n",
|
||||
(u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_replay_seq);
|
||||
(void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
|
||||
(u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
|
||||
(void) printf(" replay_seq %llu, flags 0x%llx\n",
|
||||
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
|
||||
|
||||
if (verbose >= 4)
|
||||
print_log_bp(&zh->zh_log, "\n\tfirst block: ");
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -53,11 +53,14 @@ typedef struct zfs_node {
|
|||
} zfs_node_t;
|
||||
|
||||
typedef struct callback_data {
|
||||
uu_avl_t *cb_avl;
|
||||
int cb_flags;
|
||||
zfs_type_t cb_types;
|
||||
zfs_sort_column_t *cb_sortcol;
|
||||
zprop_list_t **cb_proplist;
|
||||
uu_avl_t *cb_avl;
|
||||
int cb_flags;
|
||||
zfs_type_t cb_types;
|
||||
zfs_sort_column_t *cb_sortcol;
|
||||
zprop_list_t **cb_proplist;
|
||||
int cb_depth_limit;
|
||||
int cb_depth;
|
||||
uint8_t cb_props_table[ZFS_NUM_PROPS];
|
||||
} callback_data_t;
|
||||
|
||||
uu_avl_pool_t *avl_pool;
|
||||
|
@ -98,10 +101,17 @@ zfs_callback(zfs_handle_t *zhp, void *data)
|
|||
uu_avl_node_init(node, &node->zn_avlnode, avl_pool);
|
||||
if (uu_avl_find(cb->cb_avl, node, cb->cb_sortcol,
|
||||
&idx) == NULL) {
|
||||
if (cb->cb_proplist &&
|
||||
zfs_expand_proplist(zhp, cb->cb_proplist) != 0) {
|
||||
free(node);
|
||||
return (-1);
|
||||
if (cb->cb_proplist) {
|
||||
if ((*cb->cb_proplist) &&
|
||||
!(*cb->cb_proplist)->pl_all)
|
||||
zfs_prune_proplist(zhp,
|
||||
cb->cb_props_table);
|
||||
|
||||
if (zfs_expand_proplist(zhp, cb->cb_proplist)
|
||||
!= 0) {
|
||||
free(node);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
uu_avl_insert(cb->cb_avl, node, idx);
|
||||
dontclose = 1;
|
||||
|
@ -113,11 +123,15 @@ zfs_callback(zfs_handle_t *zhp, void *data)
|
|||
/*
|
||||
* Recurse if necessary.
|
||||
*/
|
||||
if (cb->cb_flags & ZFS_ITER_RECURSE) {
|
||||
if (cb->cb_flags & ZFS_ITER_RECURSE &&
|
||||
((cb->cb_flags & ZFS_ITER_DEPTH_LIMIT) == 0 ||
|
||||
cb->cb_depth < cb->cb_depth_limit)) {
|
||||
cb->cb_depth++;
|
||||
if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM)
|
||||
(void) zfs_iter_filesystems(zhp, zfs_callback, data);
|
||||
if ((zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) && include_snaps)
|
||||
(void) zfs_iter_snapshots(zhp, zfs_callback, data);
|
||||
cb->cb_depth--;
|
||||
}
|
||||
|
||||
if (!dontclose)
|
||||
|
@ -325,10 +339,10 @@ zfs_sort(const void *larg, const void *rarg, void *data)
|
|||
|
||||
int
|
||||
zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
|
||||
zfs_sort_column_t *sortcol, zprop_list_t **proplist,
|
||||
zfs_sort_column_t *sortcol, zprop_list_t **proplist, int limit,
|
||||
zfs_iter_f callback, void *data)
|
||||
{
|
||||
callback_data_t cb;
|
||||
callback_data_t cb = {0};
|
||||
int ret = 0;
|
||||
zfs_node_t *node;
|
||||
uu_avl_walk_t *walk;
|
||||
|
@ -346,6 +360,45 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
|
|||
cb.cb_flags = flags;
|
||||
cb.cb_proplist = proplist;
|
||||
cb.cb_types = types;
|
||||
cb.cb_depth_limit = limit;
|
||||
/*
|
||||
* If cb_proplist is provided then in the zfs_handles created we
|
||||
* retain only those properties listed in cb_proplist and sortcol.
|
||||
* The rest are pruned. So, the caller should make sure that no other
|
||||
* properties other than those listed in cb_proplist/sortcol are
|
||||
* accessed.
|
||||
*
|
||||
* If cb_proplist is NULL then we retain all the properties. We
|
||||
* always retain the zoned property, which some other properties
|
||||
* need (userquota & friends), and the createtxg property, which
|
||||
* we need to sort snapshots.
|
||||
*/
|
||||
if (cb.cb_proplist && *cb.cb_proplist) {
|
||||
zprop_list_t *p = *cb.cb_proplist;
|
||||
|
||||
while (p) {
|
||||
if (p->pl_prop >= ZFS_PROP_TYPE &&
|
||||
p->pl_prop < ZFS_NUM_PROPS) {
|
||||
cb.cb_props_table[p->pl_prop] = B_TRUE;
|
||||
}
|
||||
p = p->pl_next;
|
||||
}
|
||||
|
||||
while (sortcol) {
|
||||
if (sortcol->sc_prop >= ZFS_PROP_TYPE &&
|
||||
sortcol->sc_prop < ZFS_NUM_PROPS) {
|
||||
cb.cb_props_table[sortcol->sc_prop] = B_TRUE;
|
||||
}
|
||||
sortcol = sortcol->sc_next;
|
||||
}
|
||||
|
||||
cb.cb_props_table[ZFS_PROP_ZONED] = B_TRUE;
|
||||
cb.cb_props_table[ZFS_PROP_CREATETXG] = B_TRUE;
|
||||
} else {
|
||||
(void) memset(cb.cb_props_table, B_TRUE,
|
||||
sizeof (cb.cb_props_table));
|
||||
}
|
||||
|
||||
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("internal error: out of memory\n"));
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -41,9 +41,10 @@ typedef struct zfs_sort_column {
|
|||
#define ZFS_ITER_RECURSE (1 << 0)
|
||||
#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
|
||||
#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
|
||||
#define ZFS_ITER_DEPTH_LIMIT (1 << 3)
|
||||
|
||||
int zfs_for_each(int, char **, int options, zfs_type_t,
|
||||
zfs_sort_column_t *, zprop_list_t **, zfs_iter_f, void *);
|
||||
zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *);
|
||||
int zfs_add_sort_column(zfs_sort_column_t **, const char *, boolean_t);
|
||||
void zfs_free_sort_columns(zfs_sort_column_t *);
|
||||
|
||||
|
|
|
@ -39,12 +39,14 @@
|
|||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <zone.h>
|
||||
#include <grp.h>
|
||||
#include <pwd.h>
|
||||
#include <sys/mkdev.h>
|
||||
#include <sys/mntent.h>
|
||||
#include <sys/mnttab.h>
|
||||
#include <sys/mount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#include <libzfs.h>
|
||||
#include <libuutil.h>
|
||||
|
@ -56,6 +58,7 @@ libzfs_handle_t *g_zfs;
|
|||
|
||||
static FILE *mnttab_file;
|
||||
static char history_str[HIS_MAX_RECORD_LEN];
|
||||
const char *pypath = "/usr/lib/zfs/pyzfs.py";
|
||||
|
||||
static int zfs_do_clone(int argc, char **argv);
|
||||
static int zfs_do_create(int argc, char **argv);
|
||||
|
@ -75,8 +78,8 @@ static int zfs_do_unshare(int argc, char **argv);
|
|||
static int zfs_do_send(int argc, char **argv);
|
||||
static int zfs_do_receive(int argc, char **argv);
|
||||
static int zfs_do_promote(int argc, char **argv);
|
||||
static int zfs_do_allow(int argc, char **argv);
|
||||
static int zfs_do_unallow(int argc, char **argv);
|
||||
static int zfs_do_userspace(int argc, char **argv);
|
||||
static int zfs_do_python(int argc, char **argv);
|
||||
|
||||
/*
|
||||
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
|
||||
|
@ -116,7 +119,9 @@ typedef enum {
|
|||
HELP_UNMOUNT,
|
||||
HELP_UNSHARE,
|
||||
HELP_ALLOW,
|
||||
HELP_UNALLOW
|
||||
HELP_UNALLOW,
|
||||
HELP_USERSPACE,
|
||||
HELP_GROUPSPACE
|
||||
} zfs_help_t;
|
||||
|
||||
typedef struct zfs_command {
|
||||
|
@ -150,6 +155,8 @@ static zfs_command_t command_table[] = {
|
|||
{ "get", zfs_do_get, HELP_GET },
|
||||
{ "inherit", zfs_do_inherit, HELP_INHERIT },
|
||||
{ "upgrade", zfs_do_upgrade, HELP_UPGRADE },
|
||||
{ "userspace", zfs_do_userspace, HELP_USERSPACE },
|
||||
{ "groupspace", zfs_do_userspace, HELP_GROUPSPACE },
|
||||
{ NULL },
|
||||
{ "mount", zfs_do_mount, HELP_MOUNT },
|
||||
{ "unmount", zfs_do_unmount, HELP_UNMOUNT },
|
||||
|
@ -159,9 +166,9 @@ static zfs_command_t command_table[] = {
|
|||
{ "send", zfs_do_send, HELP_SEND },
|
||||
{ "receive", zfs_do_receive, HELP_RECEIVE },
|
||||
{ NULL },
|
||||
{ "allow", zfs_do_allow, HELP_ALLOW },
|
||||
{ "allow", zfs_do_python, HELP_ALLOW },
|
||||
{ NULL },
|
||||
{ "unallow", zfs_do_unallow, HELP_UNALLOW },
|
||||
{ "unallow", zfs_do_python, HELP_UNALLOW },
|
||||
};
|
||||
|
||||
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
|
||||
|
@ -184,8 +191,8 @@ get_usage(zfs_help_t idx)
|
|||
return (gettext("\tdestroy [-rRf] "
|
||||
"<filesystem|volume|snapshot>\n"));
|
||||
case HELP_GET:
|
||||
return (gettext("\tget [-rHp] [-o field[,...]] "
|
||||
"[-s source[,...]]\n"
|
||||
return (gettext("\tget [-rHp] [-d max] "
|
||||
"[-o field[,...]] [-s source[,...]]\n"
|
||||
"\t <\"all\" | property[,...]> "
|
||||
"[filesystem|volume|snapshot] ...\n"));
|
||||
case HELP_INHERIT:
|
||||
|
@ -195,8 +202,8 @@ get_usage(zfs_help_t idx)
|
|||
return (gettext("\tupgrade [-v]\n"
|
||||
"\tupgrade [-r] [-V version] <-a | filesystem ...>\n"));
|
||||
case HELP_LIST:
|
||||
return (gettext("\tlist [-rH] [-o property[,...]] "
|
||||
"[-t type[,...]] [-s property] ...\n"
|
||||
return (gettext("\tlist [-rH][-d max] "
|
||||
"[-o property[,...]] [-t type[,...]] [-s property] ...\n"
|
||||
"\t [-S property] ... "
|
||||
"[filesystem|volume|snapshot] ...\n"));
|
||||
case HELP_MOUNT:
|
||||
|
@ -232,7 +239,8 @@ get_usage(zfs_help_t idx)
|
|||
return (gettext("\tunshare [-f] "
|
||||
"<-a | filesystem|mountpoint>\n"));
|
||||
case HELP_ALLOW:
|
||||
return (gettext("\tallow [-ldug] "
|
||||
return (gettext("\tallow <filesystem|volume>\n"
|
||||
"\tallow [-ldug] "
|
||||
"<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
|
||||
"\t <filesystem|volume>\n"
|
||||
"\tallow [-ld] -e <perm|@setname>[,...] "
|
||||
|
@ -250,6 +258,14 @@ get_usage(zfs_help_t idx)
|
|||
"<filesystem|volume>\n"
|
||||
"\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
|
||||
"<filesystem|volume>\n"));
|
||||
case HELP_USERSPACE:
|
||||
return (gettext("\tuserspace [-hniHp] [-o field[,...]] "
|
||||
"[-sS field] ... [-t type[,...]]\n"
|
||||
"\t <filesystem|snapshot>\n"));
|
||||
case HELP_GROUPSPACE:
|
||||
return (gettext("\tgroupspace [-hniHpU] [-o field[,...]] "
|
||||
"[-sS field] ... [-t type[,...]]\n"
|
||||
"\t <filesystem|snapshot>\n"));
|
||||
}
|
||||
|
||||
abort();
|
||||
|
@ -311,7 +327,6 @@ usage(boolean_t requested)
|
|||
{
|
||||
int i;
|
||||
boolean_t show_properties = B_FALSE;
|
||||
boolean_t show_permissions = B_FALSE;
|
||||
FILE *fp = requested ? stdout : stderr;
|
||||
|
||||
if (current_command == NULL) {
|
||||
|
@ -342,13 +357,7 @@ usage(boolean_t requested)
|
|||
strcmp(current_command->name, "list") == 0))
|
||||
show_properties = B_TRUE;
|
||||
|
||||
if (current_command != NULL &&
|
||||
(strcmp(current_command->name, "allow") == 0 ||
|
||||
strcmp(current_command->name, "unallow") == 0))
|
||||
show_permissions = B_TRUE;
|
||||
|
||||
if (show_properties) {
|
||||
|
||||
(void) fprintf(fp,
|
||||
gettext("\nThe following properties are supported:\n"));
|
||||
|
||||
|
@ -359,16 +368,26 @@ usage(boolean_t requested)
|
|||
(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
|
||||
ZFS_TYPE_DATASET);
|
||||
|
||||
(void) fprintf(fp, "\t%-15s ", "userused@...");
|
||||
(void) fprintf(fp, " NO NO <size>\n");
|
||||
(void) fprintf(fp, "\t%-15s ", "groupused@...");
|
||||
(void) fprintf(fp, " NO NO <size>\n");
|
||||
(void) fprintf(fp, "\t%-15s ", "userquota@...");
|
||||
(void) fprintf(fp, "YES NO <size> | none\n");
|
||||
(void) fprintf(fp, "\t%-15s ", "groupquota@...");
|
||||
(void) fprintf(fp, "YES NO <size> | none\n");
|
||||
|
||||
(void) fprintf(fp, gettext("\nSizes are specified in bytes "
|
||||
"with standard units such as K, M, G, etc.\n"));
|
||||
(void) fprintf(fp, gettext("\nUser-defined properties can "
|
||||
"be specified by using a name containing a colon (:).\n"));
|
||||
|
||||
} else if (show_permissions) {
|
||||
(void) fprintf(fp,
|
||||
gettext("\nThe following permissions are supported:\n"));
|
||||
|
||||
zfs_deleg_permissions();
|
||||
(void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ "
|
||||
"properties must be appended with\n"
|
||||
"a user or group specifier of one of these forms:\n"
|
||||
" POSIX name (eg: \"matt\")\n"
|
||||
" POSIX id (eg: \"126829\")\n"
|
||||
" SMB name@domain (eg: \"matt@sun\")\n"
|
||||
" SMB SID (eg: \"S-1-234-567-89\")\n"));
|
||||
} else {
|
||||
(void) fprintf(fp,
|
||||
gettext("\nFor the property list, run: %s\n"),
|
||||
|
@ -415,6 +434,27 @@ parseprop(nvlist_t *props)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
parse_depth(char *opt, int *flags)
|
||||
{
|
||||
char *tmp;
|
||||
int depth;
|
||||
|
||||
depth = (int)strtol(opt, &tmp, 0);
|
||||
if (*tmp) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("%s is not an integer\n"), optarg);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
if (depth < 0) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("Depth can not be negative.\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
*flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE);
|
||||
return (depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
|
||||
*
|
||||
|
@ -1063,6 +1103,17 @@ get_callback(zfs_handle_t *zhp, void *data)
|
|||
zprop_print_one_property(zfs_get_name(zhp), cbp,
|
||||
zfs_prop_to_name(pl->pl_prop),
|
||||
buf, sourcetype, source);
|
||||
} else if (zfs_prop_userquota(pl->pl_user_prop)) {
|
||||
sourcetype = ZPROP_SRC_LOCAL;
|
||||
|
||||
if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
|
||||
buf, sizeof (buf), cbp->cb_literal) != 0) {
|
||||
sourcetype = ZPROP_SRC_NONE;
|
||||
(void) strlcpy(buf, "-", sizeof (buf));
|
||||
}
|
||||
|
||||
zprop_print_one_property(zfs_get_name(zhp), cbp,
|
||||
pl->pl_user_prop, buf, sourcetype, source);
|
||||
} else {
|
||||
if (nvlist_lookup_nvlist(userprop,
|
||||
pl->pl_user_prop, &propval) != 0) {
|
||||
|
@ -1102,6 +1153,7 @@ zfs_do_get(int argc, char **argv)
|
|||
int i, c, flags = 0;
|
||||
char *value, *fields;
|
||||
int ret;
|
||||
int limit = 0;
|
||||
zprop_list_t fake_name = { 0 };
|
||||
|
||||
/*
|
||||
|
@ -1115,11 +1167,14 @@ zfs_do_get(int argc, char **argv)
|
|||
cb.cb_type = ZFS_TYPE_DATASET;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, ":o:s:rHp")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":d:o:s:rHp")) != -1) {
|
||||
switch (c) {
|
||||
case 'p':
|
||||
cb.cb_literal = B_TRUE;
|
||||
break;
|
||||
case 'd':
|
||||
limit = parse_depth(optarg, &flags);
|
||||
break;
|
||||
case 'r':
|
||||
flags |= ZFS_ITER_RECURSE;
|
||||
break;
|
||||
|
@ -1250,7 +1305,7 @@ zfs_do_get(int argc, char **argv)
|
|||
|
||||
/* run for each object */
|
||||
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET, NULL,
|
||||
&cb.cb_proplist, get_callback, &cb);
|
||||
&cb.cb_proplist, limit, get_callback, &cb);
|
||||
|
||||
if (cb.cb_proplist == &fake_name)
|
||||
zprop_free_list(fake_name.pl_next);
|
||||
|
@ -1363,10 +1418,10 @@ zfs_do_inherit(int argc, char **argv)
|
|||
|
||||
if (flags & ZFS_ITER_RECURSE) {
|
||||
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
|
||||
NULL, NULL, inherit_recurse_cb, propname);
|
||||
NULL, NULL, 0, inherit_recurse_cb, propname);
|
||||
} else {
|
||||
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
|
||||
NULL, NULL, inherit_cb, propname);
|
||||
NULL, NULL, 0, inherit_cb, propname);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
|
@ -1435,21 +1490,30 @@ upgrade_set_callback(zfs_handle_t *zhp, void *data)
|
|||
{
|
||||
upgrade_cbdata_t *cb = data;
|
||||
int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
|
||||
int i;
|
||||
static struct { int zplver; int spaver; } table[] = {
|
||||
{ZPL_VERSION_FUID, SPA_VERSION_FUID},
|
||||
{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
|
||||
{0, 0}
|
||||
};
|
||||
|
||||
if (cb->cb_version >= ZPL_VERSION_FUID) {
|
||||
int spa_version;
|
||||
|
||||
if (zfs_spa_version(zhp, &spa_version) < 0)
|
||||
return (-1);
|
||||
for (i = 0; table[i].zplver; i++) {
|
||||
if (cb->cb_version >= table[i].zplver) {
|
||||
int spa_version;
|
||||
|
||||
if (spa_version < SPA_VERSION_FUID) {
|
||||
/* can't upgrade */
|
||||
(void) printf(gettext("%s: can not be upgraded; "
|
||||
"the pool version needs to first be upgraded\nto "
|
||||
"version %d\n\n"),
|
||||
zfs_get_name(zhp), SPA_VERSION_FUID);
|
||||
cb->cb_numfailed++;
|
||||
return (0);
|
||||
if (zfs_spa_version(zhp, &spa_version) < 0)
|
||||
return (-1);
|
||||
|
||||
if (spa_version < table[i].spaver) {
|
||||
/* can't upgrade */
|
||||
(void) printf(gettext("%s: can not be "
|
||||
"upgraded; the pool version needs to first "
|
||||
"be upgraded\nto version %d\n\n"),
|
||||
zfs_get_name(zhp), table[i].spaver);
|
||||
cb->cb_numfailed++;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1550,6 +1614,8 @@ zfs_do_upgrade(int argc, char **argv)
|
|||
(void) printf(gettext(" 2 Enhanced directory entries\n"));
|
||||
(void) printf(gettext(" 3 Case insensitive and File system "
|
||||
"unique identifer (FUID)\n"));
|
||||
(void) printf(gettext(" 4 userquota, groupquota "
|
||||
"properties\n"));
|
||||
(void) printf(gettext("\nFor more information on a particular "
|
||||
"version, including supported releases, see:\n\n"));
|
||||
(void) printf("http://www.opensolaris.org/os/community/zfs/"
|
||||
|
@ -1561,7 +1627,7 @@ zfs_do_upgrade(int argc, char **argv)
|
|||
if (cb.cb_version == 0)
|
||||
cb.cb_version = ZPL_VERSION;
|
||||
ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM,
|
||||
NULL, NULL, upgrade_set_callback, &cb);
|
||||
NULL, NULL, 0, upgrade_set_callback, &cb);
|
||||
(void) printf(gettext("%llu filesystems upgraded\n"),
|
||||
cb.cb_numupgraded);
|
||||
if (cb.cb_numsamegraded) {
|
||||
|
@ -1579,14 +1645,14 @@ zfs_do_upgrade(int argc, char **argv)
|
|||
|
||||
flags |= ZFS_ITER_RECURSE;
|
||||
ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
|
||||
NULL, NULL, upgrade_list_callback, &cb);
|
||||
NULL, NULL, 0, upgrade_list_callback, &cb);
|
||||
|
||||
found = cb.cb_foundone;
|
||||
cb.cb_foundone = B_FALSE;
|
||||
cb.cb_newer = B_TRUE;
|
||||
|
||||
ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
|
||||
NULL, NULL, upgrade_list_callback, &cb);
|
||||
NULL, NULL, 0, upgrade_list_callback, &cb);
|
||||
|
||||
if (!cb.cb_foundone && !found) {
|
||||
(void) printf(gettext("All filesystems are "
|
||||
|
@ -1598,11 +1664,90 @@ zfs_do_upgrade(int argc, char **argv)
|
|||
}
|
||||
|
||||
/*
|
||||
* list [-rH] [-o property[,property]...] [-t type[,type]...]
|
||||
* zfs userspace
|
||||
*/
|
||||
static int
|
||||
userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
|
||||
{
|
||||
zfs_userquota_prop_t *typep = arg;
|
||||
zfs_userquota_prop_t p = *typep;
|
||||
char *name = NULL;
|
||||
char *ug, *propname;
|
||||
char namebuf[32];
|
||||
char sizebuf[32];
|
||||
|
||||
if (domain == NULL || domain[0] == '\0') {
|
||||
if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) {
|
||||
struct group *g = getgrgid(rid);
|
||||
if (g)
|
||||
name = g->gr_name;
|
||||
} else {
|
||||
struct passwd *p = getpwuid(rid);
|
||||
if (p)
|
||||
name = p->pw_name;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA)
|
||||
ug = "group";
|
||||
else
|
||||
ug = "user";
|
||||
|
||||
if (p == ZFS_PROP_USERUSED || p == ZFS_PROP_GROUPUSED)
|
||||
propname = "used";
|
||||
else
|
||||
propname = "quota";
|
||||
|
||||
if (name == NULL) {
|
||||
(void) snprintf(namebuf, sizeof (namebuf),
|
||||
"%llu", (longlong_t)rid);
|
||||
name = namebuf;
|
||||
}
|
||||
zfs_nicenum(space, sizebuf, sizeof (sizebuf));
|
||||
|
||||
(void) printf("%s %s %s%c%s %s\n", propname, ug, domain,
|
||||
domain[0] ? '-' : ' ', name, sizebuf);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_userspace(int argc, char **argv)
|
||||
{
|
||||
zfs_handle_t *zhp;
|
||||
zfs_userquota_prop_t p;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Try the python version. If the execv fails, we'll continue
|
||||
* and do a simplistic implementation.
|
||||
*/
|
||||
(void) execv(pypath, argv-1);
|
||||
|
||||
(void) printf("internal error: %s not found\n"
|
||||
"falling back on built-in implementation, "
|
||||
"some features will not work\n", pypath);
|
||||
|
||||
if ((zhp = zfs_open(g_zfs, argv[argc-1], ZFS_TYPE_DATASET)) == NULL)
|
||||
return (1);
|
||||
|
||||
(void) printf("PROP TYPE NAME VALUE\n");
|
||||
|
||||
for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
|
||||
error = zfs_userspace(zhp, p, userspace_cb, &p);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...]
|
||||
* [-s property [-s property]...] [-S property [-S property]...]
|
||||
* <dataset> ...
|
||||
*
|
||||
* -r Recurse over all children
|
||||
* -d Limit recursion by depth.
|
||||
* -H Scripted mode; elide headers and separate columns by tabs
|
||||
* -o Control which fields to display.
|
||||
* -t Control which object types to display.
|
||||
|
@ -1685,7 +1830,6 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
|
|||
first = B_FALSE;
|
||||
}
|
||||
|
||||
right_justify = B_FALSE;
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (zfs_prop_get(zhp, pl->pl_prop, property,
|
||||
sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
|
||||
|
@ -1694,6 +1838,13 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
|
|||
propstr = property;
|
||||
|
||||
right_justify = zfs_prop_align_right(pl->pl_prop);
|
||||
} else if (zfs_prop_userquota(pl->pl_user_prop)) {
|
||||
if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
|
||||
property, sizeof (property), B_FALSE) != 0)
|
||||
propstr = "-";
|
||||
else
|
||||
propstr = property;
|
||||
right_justify = B_TRUE;
|
||||
} else {
|
||||
if (nvlist_lookup_nvlist(userprops,
|
||||
pl->pl_user_prop, &propval) != 0)
|
||||
|
@ -1701,6 +1852,7 @@ print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
|
|||
else
|
||||
verify(nvlist_lookup_string(propval,
|
||||
ZPROP_VALUE, &propstr) == 0);
|
||||
right_justify = B_FALSE;
|
||||
}
|
||||
|
||||
width = pl->pl_width;
|
||||
|
@ -1752,16 +1904,20 @@ zfs_do_list(int argc, char **argv)
|
|||
char *fields = NULL;
|
||||
list_cbdata_t cb = { 0 };
|
||||
char *value;
|
||||
int limit = 0;
|
||||
int ret;
|
||||
zfs_sort_column_t *sortcol = NULL;
|
||||
int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, ":o:rt:Hs:S:")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) {
|
||||
switch (c) {
|
||||
case 'o':
|
||||
fields = optarg;
|
||||
break;
|
||||
case 'd':
|
||||
limit = parse_depth(optarg, &flags);
|
||||
break;
|
||||
case 'r':
|
||||
flags |= ZFS_ITER_RECURSE;
|
||||
break;
|
||||
|
@ -1852,7 +2008,7 @@ zfs_do_list(int argc, char **argv)
|
|||
cb.cb_first = B_TRUE;
|
||||
|
||||
ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
|
||||
list_callback, &cb);
|
||||
limit, list_callback, &cb);
|
||||
|
||||
zprop_free_list(cb.cb_proplist);
|
||||
zfs_free_sort_columns(sortcol);
|
||||
|
@ -2235,7 +2391,7 @@ zfs_do_set(int argc, char **argv)
|
|||
}
|
||||
|
||||
ret = zfs_for_each(argc - 2, argv + 2, NULL,
|
||||
ZFS_TYPE_DATASET, NULL, NULL, set_callback, &cb);
|
||||
ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
@ -2495,390 +2651,6 @@ zfs_do_receive(int argc, char **argv)
|
|||
return (err != 0);
|
||||
}
|
||||
|
||||
typedef struct allow_cb {
|
||||
int a_permcnt;
|
||||
size_t a_treeoffset;
|
||||
} allow_cb_t;
|
||||
|
||||
static void
|
||||
zfs_print_perms(avl_tree_t *tree)
|
||||
{
|
||||
zfs_perm_node_t *permnode;
|
||||
|
||||
permnode = avl_first(tree);
|
||||
while (permnode != NULL) {
|
||||
(void) printf("%s", permnode->z_pname);
|
||||
permnode = AVL_NEXT(tree, permnode);
|
||||
if (permnode)
|
||||
(void) printf(",");
|
||||
else
|
||||
(void) printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over user/groups/everyone/... and the call perm_iter
|
||||
* function to print actual permission when tree has >0 nodes.
|
||||
*/
|
||||
static void
|
||||
zfs_iter_perms(avl_tree_t *tree, const char *banner, allow_cb_t *cb)
|
||||
{
|
||||
zfs_allow_node_t *item;
|
||||
avl_tree_t *ptree;
|
||||
|
||||
item = avl_first(tree);
|
||||
while (item) {
|
||||
ptree = (void *)((char *)item + cb->a_treeoffset);
|
||||
if (avl_numnodes(ptree)) {
|
||||
if (cb->a_permcnt++ == 0)
|
||||
(void) printf("%s\n", banner);
|
||||
(void) printf("\t%s", item->z_key);
|
||||
/*
|
||||
* Avoid an extra space being printed
|
||||
* for "everyone" which is keyed with a null
|
||||
* string
|
||||
*/
|
||||
if (item->z_key[0] != '\0')
|
||||
(void) printf(" ");
|
||||
zfs_print_perms(ptree);
|
||||
}
|
||||
item = AVL_NEXT(tree, item);
|
||||
}
|
||||
}
|
||||
|
||||
#define LINES "-------------------------------------------------------------\n"
|
||||
static int
|
||||
zfs_print_allows(char *ds)
|
||||
{
|
||||
zfs_allow_t *curperms, *perms;
|
||||
zfs_handle_t *zhp;
|
||||
allow_cb_t allowcb = { 0 };
|
||||
char banner[MAXPATHLEN];
|
||||
|
||||
if (ds[0] == '-')
|
||||
usage(B_FALSE);
|
||||
|
||||
if (strrchr(ds, '@')) {
|
||||
(void) fprintf(stderr, gettext("Snapshots don't have 'allow'"
|
||||
" permissions\n"));
|
||||
return (1);
|
||||
}
|
||||
if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
|
||||
return (1);
|
||||
|
||||
if (zfs_perm_get(zhp, &perms)) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("Failed to retrieve 'allows' on %s\n"), ds);
|
||||
zfs_close(zhp);
|
||||
return (1);
|
||||
}
|
||||
|
||||
zfs_close(zhp);
|
||||
|
||||
if (perms != NULL)
|
||||
(void) printf("%s", LINES);
|
||||
for (curperms = perms; curperms; curperms = curperms->z_next) {
|
||||
|
||||
(void) snprintf(banner, sizeof (banner),
|
||||
gettext("Permission sets on (%s)"), curperms->z_setpoint);
|
||||
allowcb.a_treeoffset =
|
||||
offsetof(zfs_allow_node_t, z_localdescend);
|
||||
allowcb.a_permcnt = 0;
|
||||
zfs_iter_perms(&curperms->z_sets, banner, &allowcb);
|
||||
|
||||
(void) snprintf(banner, sizeof (banner),
|
||||
gettext("Create time permissions on (%s)"),
|
||||
curperms->z_setpoint);
|
||||
allowcb.a_treeoffset =
|
||||
offsetof(zfs_allow_node_t, z_localdescend);
|
||||
allowcb.a_permcnt = 0;
|
||||
zfs_iter_perms(&curperms->z_crperms, banner, &allowcb);
|
||||
|
||||
|
||||
(void) snprintf(banner, sizeof (banner),
|
||||
gettext("Local permissions on (%s)"), curperms->z_setpoint);
|
||||
allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_local);
|
||||
allowcb.a_permcnt = 0;
|
||||
zfs_iter_perms(&curperms->z_user, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_group, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
|
||||
|
||||
(void) snprintf(banner, sizeof (banner),
|
||||
gettext("Descendent permissions on (%s)"),
|
||||
curperms->z_setpoint);
|
||||
allowcb.a_treeoffset = offsetof(zfs_allow_node_t, z_descend);
|
||||
allowcb.a_permcnt = 0;
|
||||
zfs_iter_perms(&curperms->z_user, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_group, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
|
||||
|
||||
(void) snprintf(banner, sizeof (banner),
|
||||
gettext("Local+Descendent permissions on (%s)"),
|
||||
curperms->z_setpoint);
|
||||
allowcb.a_treeoffset =
|
||||
offsetof(zfs_allow_node_t, z_localdescend);
|
||||
allowcb.a_permcnt = 0;
|
||||
zfs_iter_perms(&curperms->z_user, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_group, banner, &allowcb);
|
||||
zfs_iter_perms(&curperms->z_everyone, banner, &allowcb);
|
||||
|
||||
(void) printf("%s", LINES);
|
||||
}
|
||||
zfs_free_allows(perms);
|
||||
return (0);
|
||||
}
|
||||
|
||||
#define ALLOWOPTIONS "ldcsu:g:e"
|
||||
#define UNALLOWOPTIONS "ldcsu:g:er"
|
||||
|
||||
/*
|
||||
* Validate options, and build necessary datastructure to display/remove/add
|
||||
* permissions.
|
||||
* Returns 0 - If permissions should be added/removed
|
||||
* Returns 1 - If permissions should be displayed.
|
||||
* Returns -1 - on failure
|
||||
*/
|
||||
int
|
||||
parse_allow_args(int *argc, char **argv[], boolean_t unallow,
|
||||
char **ds, int *recurse, nvlist_t **zperms)
|
||||
{
|
||||
int c;
|
||||
char *options = unallow ? UNALLOWOPTIONS : ALLOWOPTIONS;
|
||||
zfs_deleg_inherit_t deleg_type = ZFS_DELEG_NONE;
|
||||
zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
|
||||
char *who = NULL;
|
||||
char *perms = NULL;
|
||||
zfs_handle_t *zhp;
|
||||
|
||||
while ((c = getopt(*argc, *argv, options)) != -1) {
|
||||
switch (c) {
|
||||
case 'l':
|
||||
if (who_type == ZFS_DELEG_CREATE ||
|
||||
who_type == ZFS_DELEG_NAMED_SET)
|
||||
usage(B_FALSE);
|
||||
|
||||
deleg_type |= ZFS_DELEG_PERM_LOCAL;
|
||||
break;
|
||||
case 'd':
|
||||
if (who_type == ZFS_DELEG_CREATE ||
|
||||
who_type == ZFS_DELEG_NAMED_SET)
|
||||
usage(B_FALSE);
|
||||
|
||||
deleg_type |= ZFS_DELEG_PERM_DESCENDENT;
|
||||
break;
|
||||
case 'r':
|
||||
*recurse = B_TRUE;
|
||||
break;
|
||||
case 'c':
|
||||
if (who_type != ZFS_DELEG_WHO_UNKNOWN)
|
||||
usage(B_FALSE);
|
||||
if (deleg_type)
|
||||
usage(B_FALSE);
|
||||
who_type = ZFS_DELEG_CREATE;
|
||||
break;
|
||||
case 's':
|
||||
if (who_type != ZFS_DELEG_WHO_UNKNOWN)
|
||||
usage(B_FALSE);
|
||||
if (deleg_type)
|
||||
usage(B_FALSE);
|
||||
who_type = ZFS_DELEG_NAMED_SET;
|
||||
break;
|
||||
case 'u':
|
||||
if (who_type != ZFS_DELEG_WHO_UNKNOWN)
|
||||
usage(B_FALSE);
|
||||
who_type = ZFS_DELEG_USER;
|
||||
who = optarg;
|
||||
break;
|
||||
case 'g':
|
||||
if (who_type != ZFS_DELEG_WHO_UNKNOWN)
|
||||
usage(B_FALSE);
|
||||
who_type = ZFS_DELEG_GROUP;
|
||||
who = optarg;
|
||||
break;
|
||||
case 'e':
|
||||
if (who_type != ZFS_DELEG_WHO_UNKNOWN)
|
||||
usage(B_FALSE);
|
||||
who_type = ZFS_DELEG_EVERYONE;
|
||||
break;
|
||||
default:
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (deleg_type == 0)
|
||||
deleg_type = ZFS_DELEG_PERM_LOCALDESCENDENT;
|
||||
|
||||
*argc -= optind;
|
||||
*argv += optind;
|
||||
|
||||
if (unallow == B_FALSE && *argc == 1) {
|
||||
/*
|
||||
* Only print permissions if no options were processed
|
||||
*/
|
||||
if (optind == 1)
|
||||
return (1);
|
||||
else
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize variables for zfs_build_perms based on number
|
||||
* of arguments.
|
||||
* 3 arguments ==> zfs [un]allow joe perm,perm,perm <dataset> or
|
||||
* zfs [un]allow -s @set1 perm,perm <dataset>
|
||||
* 2 arguments ==> zfs [un]allow -c perm,perm <dataset> or
|
||||
* zfs [un]allow -u|-g <name> perm <dataset> or
|
||||
* zfs [un]allow -e perm,perm <dataset>
|
||||
* zfs unallow joe <dataset>
|
||||
* zfs unallow -s @set1 <dataset>
|
||||
* 1 argument ==> zfs [un]allow -e <dataset> or
|
||||
* zfs [un]allow -c <dataset>
|
||||
*/
|
||||
|
||||
switch (*argc) {
|
||||
case 3:
|
||||
perms = (*argv)[1];
|
||||
who = (*argv)[0];
|
||||
*ds = (*argv)[2];
|
||||
|
||||
/*
|
||||
* advance argc/argv for do_allow cases.
|
||||
* for do_allow case make sure who have a know who type
|
||||
* and its not a permission set.
|
||||
*/
|
||||
if (unallow == B_TRUE) {
|
||||
*argc -= 2;
|
||||
*argv += 2;
|
||||
} else if (who_type != ZFS_DELEG_WHO_UNKNOWN &&
|
||||
who_type != ZFS_DELEG_NAMED_SET)
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (unallow == B_TRUE && (who_type == ZFS_DELEG_EVERYONE ||
|
||||
who_type == ZFS_DELEG_CREATE || who != NULL)) {
|
||||
perms = (*argv)[0];
|
||||
*ds = (*argv)[1];
|
||||
} else {
|
||||
if (unallow == B_FALSE &&
|
||||
(who_type == ZFS_DELEG_WHO_UNKNOWN ||
|
||||
who_type == ZFS_DELEG_NAMED_SET))
|
||||
usage(B_FALSE);
|
||||
else if (who_type == ZFS_DELEG_WHO_UNKNOWN ||
|
||||
who_type == ZFS_DELEG_NAMED_SET)
|
||||
who = (*argv)[0];
|
||||
else if (who_type != ZFS_DELEG_NAMED_SET)
|
||||
perms = (*argv)[0];
|
||||
*ds = (*argv)[1];
|
||||
}
|
||||
if (unallow == B_TRUE) {
|
||||
(*argc)--;
|
||||
(*argv)++;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if (unallow == B_FALSE)
|
||||
usage(B_FALSE);
|
||||
if (who == NULL && who_type != ZFS_DELEG_CREATE &&
|
||||
who_type != ZFS_DELEG_EVERYONE)
|
||||
usage(B_FALSE);
|
||||
*ds = (*argv)[0];
|
||||
break;
|
||||
|
||||
default:
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if (strrchr(*ds, '@')) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("Can't set or remove 'allow' permissions "
|
||||
"on snapshots.\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if ((zhp = zfs_open(g_zfs, *ds, ZFS_TYPE_DATASET)) == NULL)
|
||||
return (-1);
|
||||
|
||||
if ((zfs_build_perms(zhp, who, perms,
|
||||
who_type, deleg_type, zperms)) != 0) {
|
||||
zfs_close(zhp);
|
||||
return (-1);
|
||||
}
|
||||
zfs_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_allow(int argc, char **argv)
|
||||
{
|
||||
char *ds;
|
||||
nvlist_t *zperms = NULL;
|
||||
zfs_handle_t *zhp;
|
||||
int unused;
|
||||
int ret;
|
||||
|
||||
if ((ret = parse_allow_args(&argc, &argv, B_FALSE, &ds,
|
||||
&unused, &zperms)) == -1)
|
||||
return (1);
|
||||
|
||||
if (ret == 1)
|
||||
return (zfs_print_allows(argv[0]));
|
||||
|
||||
if ((zhp = zfs_open(g_zfs, ds, ZFS_TYPE_DATASET)) == NULL)
|
||||
return (1);
|
||||
|
||||
if (zfs_perm_set(zhp, zperms)) {
|
||||
zfs_close(zhp);
|
||||
nvlist_free(zperms);
|
||||
return (1);
|
||||
}
|
||||
nvlist_free(zperms);
|
||||
zfs_close(zhp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
unallow_callback(zfs_handle_t *zhp, void *data)
|
||||
{
|
||||
nvlist_t *nvp = (nvlist_t *)data;
|
||||
int error;
|
||||
|
||||
error = zfs_perm_remove(zhp, nvp);
|
||||
if (error) {
|
||||
(void) fprintf(stderr, gettext("Failed to remove permissions "
|
||||
"on %s\n"), zfs_get_name(zhp));
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_unallow(int argc, char **argv)
|
||||
{
|
||||
int recurse = B_FALSE;
|
||||
char *ds;
|
||||
int error;
|
||||
nvlist_t *zperms = NULL;
|
||||
int flags = 0;
|
||||
|
||||
if (parse_allow_args(&argc, &argv, B_TRUE,
|
||||
&ds, &recurse, &zperms) == -1)
|
||||
return (1);
|
||||
|
||||
if (recurse)
|
||||
flags |= ZFS_ITER_RECURSE;
|
||||
error = zfs_for_each(argc, argv, flags,
|
||||
ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME, NULL,
|
||||
NULL, unallow_callback, (void *)zperms);
|
||||
|
||||
if (zperms)
|
||||
nvlist_free(zperms);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
typedef struct get_all_cbdata {
|
||||
zfs_handle_t **cb_handles;
|
||||
size_t cb_alloc;
|
||||
|
@ -3944,6 +3716,15 @@ zfs_do_unshare(int argc, char **argv)
|
|||
return (unshare_unmount(OP_SHARE, argc, argv));
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfs_do_python(int argc, char **argv)
|
||||
{
|
||||
(void) execv(pypath, argv-1);
|
||||
(void) printf("internal error: %s not found\n", pypath);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
|
||||
* 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
|
||||
|
@ -4197,6 +3978,7 @@ main(int argc, char **argv)
|
|||
/*
|
||||
* Run the appropriate command.
|
||||
*/
|
||||
libzfs_mnttab_cache(g_zfs, B_TRUE);
|
||||
if (find_command_idx(cmdname, &i) == 0) {
|
||||
current_command = &command_table[i];
|
||||
ret = command_table[i].func(argc - 1, argv + 1);
|
||||
|
@ -4209,6 +3991,7 @@ main(int argc, char **argv)
|
|||
"command '%s'\n"), cmdname);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
libzfs_mnttab_cache(g_zfs, B_FALSE);
|
||||
}
|
||||
|
||||
(void) fclose(mnttab_file);
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* ZFS Fault Injector
|
||||
*
|
||||
|
@ -224,7 +222,7 @@ usage(void)
|
|||
"\t\tClear the particular record (if given a numeric ID), or\n"
|
||||
"\t\tall records if 'all' is specificed.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
|
||||
"\t\tInject a fault into a particular device or the device's\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
|
||||
"\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
|
||||
|
@ -516,7 +514,7 @@ main(int argc, char **argv)
|
|||
return (0);
|
||||
}
|
||||
|
||||
while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
flags |= ZINJECT_FLUSH_ARC;
|
||||
|
@ -553,6 +551,9 @@ main(int argc, char **argv)
|
|||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'F':
|
||||
record.zi_failfast = B_TRUE;
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
return (0);
|
||||
|
|
|
@ -376,12 +376,11 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props,
|
|||
}
|
||||
normnm = zpool_prop_to_name(prop);
|
||||
} else {
|
||||
if ((fprop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
|
||||
(void) fprintf(stderr, gettext("property '%s' is "
|
||||
"not a valid file system property\n"), propname);
|
||||
return (2);
|
||||
if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
|
||||
normnm = zfs_prop_to_name(fprop);
|
||||
} else {
|
||||
normnm = propname;
|
||||
}
|
||||
normnm = zfs_prop_to_name(fprop);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_string(proplist, normnm, &strval) == 0 &&
|
||||
|
@ -979,14 +978,189 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
|
|||
return (max);
|
||||
}
|
||||
|
||||
typedef struct spare_cbdata {
|
||||
uint64_t cb_guid;
|
||||
zpool_handle_t *cb_zhp;
|
||||
} spare_cbdata_t;
|
||||
|
||||
static boolean_t
|
||||
find_vdev(nvlist_t *nv, uint64_t search)
|
||||
{
|
||||
uint64_t guid;
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
|
||||
search == guid)
|
||||
return (B_TRUE);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++)
|
||||
if (find_vdev(child[c], search))
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static int
|
||||
find_spare(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
spare_cbdata_t *cbp = data;
|
||||
nvlist_t *config, *nvroot;
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
|
||||
if (find_vdev(nvroot, cbp->cb_guid)) {
|
||||
cbp->cb_zhp = zhp;
|
||||
return (1);
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out configuration state as requested by status_callback.
|
||||
*/
|
||||
void
|
||||
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
int namewidth, int depth, boolean_t isspare)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
vdev_stat_t *vs;
|
||||
char rbuf[6], wbuf[6], cbuf[6], repaired[7];
|
||||
char *vname;
|
||||
uint64_t notpresent;
|
||||
spare_cbdata_t cb;
|
||||
char *state;
|
||||
|
||||
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
|
||||
if (isspare) {
|
||||
/*
|
||||
* For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
|
||||
* online drives.
|
||||
*/
|
||||
if (vs->vs_aux == VDEV_AUX_SPARED)
|
||||
state = "INUSE";
|
||||
else if (vs->vs_state == VDEV_STATE_HEALTHY)
|
||||
state = "AVAIL";
|
||||
}
|
||||
|
||||
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
|
||||
name, state);
|
||||
|
||||
if (!isspare) {
|
||||
zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
|
||||
zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
|
||||
zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
|
||||
(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
¬present) == 0) {
|
||||
char *path;
|
||||
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
|
||||
(void) printf(" was %s", path);
|
||||
} else if (vs->vs_aux != 0) {
|
||||
(void) printf(" ");
|
||||
|
||||
switch (vs->vs_aux) {
|
||||
case VDEV_AUX_OPEN_FAILED:
|
||||
(void) printf(gettext("cannot open"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_GUID_SUM:
|
||||
(void) printf(gettext("missing device"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_NO_REPLICAS:
|
||||
(void) printf(gettext("insufficient replicas"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_VERSION_NEWER:
|
||||
(void) printf(gettext("newer version"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_SPARED:
|
||||
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
|
||||
&cb.cb_guid) == 0);
|
||||
if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
|
||||
if (strcmp(zpool_get_name(cb.cb_zhp),
|
||||
zpool_get_name(zhp)) == 0)
|
||||
(void) printf(gettext("currently in "
|
||||
"use"));
|
||||
else
|
||||
(void) printf(gettext("in use by "
|
||||
"pool '%s'"),
|
||||
zpool_get_name(cb.cb_zhp));
|
||||
zpool_close(cb.cb_zhp);
|
||||
} else {
|
||||
(void) printf(gettext("currently in use"));
|
||||
}
|
||||
break;
|
||||
|
||||
case VDEV_AUX_ERR_EXCEEDED:
|
||||
(void) printf(gettext("too many errors"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_IO_FAILURE:
|
||||
(void) printf(gettext("experienced I/O failures"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_LOG:
|
||||
(void) printf(gettext("bad intent log"));
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) printf(gettext("corrupted data"));
|
||||
break;
|
||||
}
|
||||
} else if (vs->vs_scrub_repaired != 0 && children == 0) {
|
||||
/*
|
||||
* Report bytes resilvered/repaired on leaf devices.
|
||||
*/
|
||||
zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
|
||||
(void) printf(gettext(" %s %s"), repaired,
|
||||
(vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
|
||||
"resilvered" : "repaired");
|
||||
}
|
||||
|
||||
(void) printf("\n");
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
|
||||
/* Don't print logs here */
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (is_log)
|
||||
continue;
|
||||
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
print_status_config(zhp, vname, child[c],
|
||||
namewidth, depth + 2, isspare);
|
||||
free(vname);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Print the configuration of an exported pool. Iterate over all vdevs in the
|
||||
* pool, printing out the name and status for each one.
|
||||
*/
|
||||
void
|
||||
print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
||||
boolean_t print_logs)
|
||||
print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
|
@ -1043,12 +1217,11 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
|||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if ((is_log && !print_logs) || (!is_log && print_logs))
|
||||
if (is_log)
|
||||
continue;
|
||||
|
||||
vname = zpool_vdev_name(g_zfs, NULL, child[c]);
|
||||
print_import_config(vname, child[c],
|
||||
namewidth, depth + 2, B_FALSE);
|
||||
print_import_config(vname, child[c], namewidth, depth + 2);
|
||||
free(vname);
|
||||
}
|
||||
|
||||
|
@ -1073,6 +1246,43 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print log vdevs.
|
||||
* Logs are recorded as top level vdevs in the main pool child array
|
||||
* but with "is_log" set to 1. We use either print_status_config() or
|
||||
* print_import_config() to print the top level logs then any log
|
||||
* children (eg mirrored slogs) are printed recursively - which
|
||||
* works because only the top level vdev is marked "is_log"
|
||||
*/
|
||||
static void
|
||||
print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
|
||||
{
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
|
||||
&children) != 0)
|
||||
return;
|
||||
|
||||
(void) printf(gettext("\tlogs\n"));
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
char *name;
|
||||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (!is_log)
|
||||
continue;
|
||||
name = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
if (verbose)
|
||||
print_status_config(zhp, name, child[c], namewidth,
|
||||
2, B_FALSE);
|
||||
else
|
||||
print_import_config(name, child[c], namewidth, 2);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Display the status for the given pool.
|
||||
*/
|
||||
|
@ -1241,11 +1451,9 @@ show_import(nvlist_t *config)
|
|||
if (namewidth < 10)
|
||||
namewidth = 10;
|
||||
|
||||
print_import_config(name, nvroot, namewidth, 0, B_FALSE);
|
||||
if (num_logs(nvroot) > 0) {
|
||||
(void) printf(gettext("\tlogs\n"));
|
||||
print_import_config(name, nvroot, namewidth, 0, B_TRUE);
|
||||
}
|
||||
print_import_config(name, nvroot, namewidth, 0);
|
||||
if (num_logs(nvroot) > 0)
|
||||
print_logs(NULL, nvroot, namewidth, B_FALSE);
|
||||
|
||||
if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
|
||||
(void) printf(gettext("\n\tAdditional devices are known to "
|
||||
|
@ -2427,10 +2635,14 @@ zpool_do_online(int argc, char **argv)
|
|||
zpool_handle_t *zhp;
|
||||
int ret = 0;
|
||||
vdev_state_t newstate;
|
||||
int flags = 0;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "t")) != -1) {
|
||||
while ((c = getopt(argc, argv, "et")) != -1) {
|
||||
switch (c) {
|
||||
case 'e':
|
||||
flags |= ZFS_ONLINE_EXPAND;
|
||||
break;
|
||||
case 't':
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
|
@ -2458,7 +2670,7 @@ zpool_do_online(int argc, char **argv)
|
|||
return (1);
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (zpool_vdev_online(zhp, argv[i], 0, &newstate) == 0) {
|
||||
if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
|
||||
if (newstate != VDEV_STATE_HEALTHY) {
|
||||
(void) printf(gettext("warning: device '%s' "
|
||||
"onlined, but remains in faulted state\n"),
|
||||
|
@ -2715,181 +2927,6 @@ print_scrub_status(nvlist_t *nvroot)
|
|||
(u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
|
||||
}
|
||||
|
||||
typedef struct spare_cbdata {
|
||||
uint64_t cb_guid;
|
||||
zpool_handle_t *cb_zhp;
|
||||
} spare_cbdata_t;
|
||||
|
||||
static boolean_t
|
||||
find_vdev(nvlist_t *nv, uint64_t search)
|
||||
{
|
||||
uint64_t guid;
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
|
||||
search == guid)
|
||||
return (B_TRUE);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++)
|
||||
if (find_vdev(child[c], search))
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static int
|
||||
find_spare(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
spare_cbdata_t *cbp = data;
|
||||
nvlist_t *config, *nvroot;
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
|
||||
if (find_vdev(nvroot, cbp->cb_guid)) {
|
||||
cbp->cb_zhp = zhp;
|
||||
return (1);
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out configuration state as requested by status_callback.
|
||||
*/
|
||||
void
|
||||
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
int namewidth, int depth, boolean_t isspare, boolean_t print_logs)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
vdev_stat_t *vs;
|
||||
char rbuf[6], wbuf[6], cbuf[6], repaired[7];
|
||||
char *vname;
|
||||
uint64_t notpresent;
|
||||
spare_cbdata_t cb;
|
||||
char *state;
|
||||
|
||||
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
|
||||
if (isspare) {
|
||||
/*
|
||||
* For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
|
||||
* online drives.
|
||||
*/
|
||||
if (vs->vs_aux == VDEV_AUX_SPARED)
|
||||
state = "INUSE";
|
||||
else if (vs->vs_state == VDEV_STATE_HEALTHY)
|
||||
state = "AVAIL";
|
||||
}
|
||||
|
||||
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
|
||||
name, state);
|
||||
|
||||
if (!isspare) {
|
||||
zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
|
||||
zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
|
||||
zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
|
||||
(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
¬present) == 0) {
|
||||
char *path;
|
||||
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
|
||||
(void) printf(" was %s", path);
|
||||
} else if (vs->vs_aux != 0) {
|
||||
(void) printf(" ");
|
||||
|
||||
switch (vs->vs_aux) {
|
||||
case VDEV_AUX_OPEN_FAILED:
|
||||
(void) printf(gettext("cannot open"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_GUID_SUM:
|
||||
(void) printf(gettext("missing device"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_NO_REPLICAS:
|
||||
(void) printf(gettext("insufficient replicas"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_VERSION_NEWER:
|
||||
(void) printf(gettext("newer version"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_SPARED:
|
||||
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
|
||||
&cb.cb_guid) == 0);
|
||||
if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
|
||||
if (strcmp(zpool_get_name(cb.cb_zhp),
|
||||
zpool_get_name(zhp)) == 0)
|
||||
(void) printf(gettext("currently in "
|
||||
"use"));
|
||||
else
|
||||
(void) printf(gettext("in use by "
|
||||
"pool '%s'"),
|
||||
zpool_get_name(cb.cb_zhp));
|
||||
zpool_close(cb.cb_zhp);
|
||||
} else {
|
||||
(void) printf(gettext("currently in use"));
|
||||
}
|
||||
break;
|
||||
|
||||
case VDEV_AUX_ERR_EXCEEDED:
|
||||
(void) printf(gettext("too many errors"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_IO_FAILURE:
|
||||
(void) printf(gettext("experienced I/O failures"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_LOG:
|
||||
(void) printf(gettext("bad intent log"));
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) printf(gettext("corrupted data"));
|
||||
break;
|
||||
}
|
||||
} else if (vs->vs_scrub_repaired != 0 && children == 0) {
|
||||
/*
|
||||
* Report bytes resilvered/repaired on leaf devices.
|
||||
*/
|
||||
zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
|
||||
(void) printf(gettext(" %s %s"), repaired,
|
||||
(vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
|
||||
"resilvered" : "repaired");
|
||||
}
|
||||
|
||||
(void) printf("\n");
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if ((is_log && !print_logs) || (!is_log && print_logs))
|
||||
continue;
|
||||
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
print_status_config(zhp, vname, child[c],
|
||||
namewidth, depth + 2, isspare, B_FALSE);
|
||||
free(vname);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_error_log(zpool_handle_t *zhp)
|
||||
{
|
||||
|
@ -2940,7 +2977,7 @@ print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
|
|||
for (i = 0; i < nspares; i++) {
|
||||
name = zpool_vdev_name(g_zfs, zhp, spares[i]);
|
||||
print_status_config(zhp, name, spares[i],
|
||||
namewidth, 2, B_TRUE, B_FALSE);
|
||||
namewidth, 2, B_TRUE);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
|
@ -2960,7 +2997,7 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
|
|||
for (i = 0; i < nl2cache; i++) {
|
||||
name = zpool_vdev_name(g_zfs, zhp, l2cache[i]);
|
||||
print_status_config(zhp, name, l2cache[i],
|
||||
namewidth, 2, B_FALSE, B_FALSE);
|
||||
namewidth, 2, B_FALSE);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
|
@ -3190,11 +3227,10 @@ status_callback(zpool_handle_t *zhp, void *data)
|
|||
(void) printf(gettext("\t%-*s %-8s %5s %5s %5s\n"), namewidth,
|
||||
"NAME", "STATE", "READ", "WRITE", "CKSUM");
|
||||
print_status_config(zhp, zpool_get_name(zhp), nvroot,
|
||||
namewidth, 0, B_FALSE, B_FALSE);
|
||||
if (num_logs(nvroot) > 0)
|
||||
print_status_config(zhp, "logs", nvroot, namewidth, 0,
|
||||
B_FALSE, B_TRUE);
|
||||
namewidth, 0, B_FALSE);
|
||||
|
||||
if (num_logs(nvroot) > 0)
|
||||
print_logs(zhp, nvroot, namewidth, B_TRUE);
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
|
||||
&l2cache, &nl2cache) == 0)
|
||||
print_l2cache(zhp, l2cache, nl2cache, namewidth);
|
||||
|
@ -3418,7 +3454,7 @@ zpool_do_upgrade(int argc, char **argv)
|
|||
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "avV:")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":avV:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
cb.cb_all = B_TRUE;
|
||||
|
@ -3435,6 +3471,11 @@ zpool_do_upgrade(int argc, char **argv)
|
|||
usage(B_FALSE);
|
||||
}
|
||||
break;
|
||||
case ':':
|
||||
(void) fprintf(stderr, gettext("missing argument for "
|
||||
"'%c' option\n"), optopt);
|
||||
usage(B_FALSE);
|
||||
break;
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
|
@ -3495,8 +3536,9 @@ zpool_do_upgrade(int argc, char **argv)
|
|||
(void) printf(gettext(" 11 Improved scrub performance\n"));
|
||||
(void) printf(gettext(" 12 Snapshot properties\n"));
|
||||
(void) printf(gettext(" 13 snapused property\n"));
|
||||
(void) printf(gettext(" 14 passthrough-x aclinherit "
|
||||
"support\n"));
|
||||
(void) printf(gettext(" 14 passthrough-x aclinherit\n"));
|
||||
(void) printf(gettext(" 15 user/group space accounting\n"));
|
||||
(void) printf(gettext(" 16 stmf property support\n"));
|
||||
(void) printf(gettext("For more information on a particular "
|
||||
"version, including supported releases, see:\n\n"));
|
||||
(void) printf("http://www.opensolaris.org/os/community/zfs/"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -76,6 +76,7 @@
|
|||
#include <sys/spa.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/txg.h>
|
||||
#include <sys/dbuf.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/poll.h>
|
||||
|
@ -92,6 +93,7 @@
|
|||
#include <sys/vdev_file.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <stdio.h>
|
||||
#include <stdio_ext.h>
|
||||
|
@ -162,6 +164,7 @@ typedef void ztest_func_t(ztest_args_t *);
|
|||
* Note: these aren't static because we want dladdr() to work.
|
||||
*/
|
||||
ztest_func_t ztest_dmu_read_write;
|
||||
ztest_func_t ztest_dmu_read_write_zcopy;
|
||||
ztest_func_t ztest_dmu_write_parallel;
|
||||
ztest_func_t ztest_dmu_object_alloc_free;
|
||||
ztest_func_t ztest_zap;
|
||||
|
@ -170,6 +173,7 @@ ztest_func_t ztest_traverse;
|
|||
ztest_func_t ztest_dsl_prop_get_set;
|
||||
ztest_func_t ztest_dmu_objset_create_destroy;
|
||||
ztest_func_t ztest_dmu_snapshot_create_destroy;
|
||||
ztest_func_t ztest_dsl_dataset_promote_busy;
|
||||
ztest_func_t ztest_spa_create_destroy;
|
||||
ztest_func_t ztest_fault_inject;
|
||||
ztest_func_t ztest_spa_rename;
|
||||
|
@ -196,6 +200,7 @@ uint64_t zopt_rarely = 60; /* every 60 seconds */
|
|||
|
||||
ztest_info_t ztest_info[] = {
|
||||
{ ztest_dmu_read_write, 1, &zopt_always },
|
||||
{ ztest_dmu_read_write_zcopy, 1, &zopt_always },
|
||||
{ ztest_dmu_write_parallel, 30, &zopt_always },
|
||||
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
|
||||
{ ztest_zap, 30, &zopt_always },
|
||||
|
@ -208,6 +213,7 @@ ztest_info_t ztest_info[] = {
|
|||
{ ztest_spa_rename, 1, &zopt_rarely },
|
||||
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
|
||||
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
|
||||
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
|
||||
{ ztest_vdev_add_remove, 1, &zopt_vdevtime },
|
||||
{ ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
|
||||
{ ztest_scrub, 1, &zopt_vdevtime },
|
||||
|
@ -242,9 +248,11 @@ static ztest_shared_t *ztest_shared;
|
|||
static int ztest_random_fd;
|
||||
static int ztest_dump_core = 1;
|
||||
|
||||
static uint64_t metaslab_sz;
|
||||
static boolean_t ztest_exiting;
|
||||
|
||||
extern uint64_t metaslab_gang_bang;
|
||||
extern uint64_t metaslab_df_alloc_threshold;
|
||||
|
||||
#define ZTEST_DIROBJ 1
|
||||
#define ZTEST_MICROZAP_OBJ 2
|
||||
|
@ -946,7 +954,7 @@ ztest_vdev_aux_add_remove(ztest_args_t *za)
|
|||
* of devices that have pending state changes.
|
||||
*/
|
||||
if (ztest_random(2) == 0)
|
||||
(void) vdev_online(spa, guid, B_FALSE, NULL);
|
||||
(void) vdev_online(spa, guid, 0, NULL);
|
||||
|
||||
error = spa_vdev_remove(spa, guid, B_FALSE);
|
||||
if (error != 0 && error != EBUSY)
|
||||
|
@ -1024,7 +1032,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
|
|||
}
|
||||
|
||||
oldguid = oldvd->vdev_guid;
|
||||
oldsize = vdev_get_rsize(oldvd);
|
||||
oldsize = vdev_get_min_asize(oldvd);
|
||||
oldvd_is_log = oldvd->vdev_top->vdev_islog;
|
||||
(void) strcpy(oldpath, oldvd->vdev_path);
|
||||
pvd = oldvd->vdev_parent;
|
||||
|
@ -1060,7 +1068,7 @@ ztest_vdev_attach_detach(ztest_args_t *za)
|
|||
}
|
||||
|
||||
if (newvd) {
|
||||
newsize = vdev_get_rsize(newvd);
|
||||
newsize = vdev_get_min_asize(newvd);
|
||||
} else {
|
||||
/*
|
||||
* Make newsize a little bigger or smaller than oldsize.
|
||||
|
@ -1135,6 +1143,95 @@ ztest_vdev_attach_detach(ztest_args_t *za)
|
|||
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function which expands the physical size of the vdev.
|
||||
*/
|
||||
vdev_t *
|
||||
grow_vdev(vdev_t *vd, void *arg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
size_t *newsize = arg;
|
||||
size_t fsize;
|
||||
int fd;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
|
||||
ASSERT(vd->vdev_ops->vdev_op_leaf);
|
||||
|
||||
if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
|
||||
return (vd);
|
||||
|
||||
fsize = lseek(fd, 0, SEEK_END);
|
||||
(void) ftruncate(fd, *newsize);
|
||||
|
||||
if (zopt_verbose >= 6) {
|
||||
(void) printf("%s grew from %lu to %lu bytes\n",
|
||||
vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
|
||||
}
|
||||
(void) close(fd);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback function which expands a given vdev by calling vdev_online().
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
vdev_t *
|
||||
online_vdev(vdev_t *vd, void *arg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *tvd = vd->vdev_top;
|
||||
vdev_t *pvd = vd->vdev_parent;
|
||||
uint64_t guid = vd->vdev_guid;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
|
||||
ASSERT(vd->vdev_ops->vdev_op_leaf);
|
||||
|
||||
/* Calling vdev_online will initialize the new metaslabs */
|
||||
spa_config_exit(spa, SCL_STATE, spa);
|
||||
(void) vdev_online(spa, guid, ZFS_ONLINE_EXPAND, NULL);
|
||||
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
|
||||
|
||||
/*
|
||||
* Since we dropped the lock we need to ensure that we're
|
||||
* still talking to the original vdev. It's possible this
|
||||
* vdev may have been detached/replaced while we were
|
||||
* trying to online it.
|
||||
*/
|
||||
if (vd != vdev_lookup_by_guid(tvd, guid) || vd->vdev_parent != pvd) {
|
||||
if (zopt_verbose >= 6) {
|
||||
(void) printf("vdev %p has disappeared, was "
|
||||
"guid %llu\n", (void *)vd, (u_longlong_t)guid);
|
||||
}
|
||||
return (vd);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Traverse the vdev tree calling the supplied function.
|
||||
* We continue to walk the tree until we either have walked all
|
||||
* children or we receive a non-NULL return from the callback.
|
||||
* If a NULL callback is passed, then we just return back the first
|
||||
* leaf vdev we encounter.
|
||||
*/
|
||||
vdev_t *
|
||||
vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
|
||||
{
|
||||
if (vd->vdev_ops->vdev_op_leaf) {
|
||||
if (func == NULL)
|
||||
return (vd);
|
||||
else
|
||||
return (func(vd, arg));
|
||||
}
|
||||
|
||||
for (uint_t c = 0; c < vd->vdev_children; c++) {
|
||||
vdev_t *cvd = vd->vdev_child[c];
|
||||
if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
|
||||
return (cvd);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that dynamic LUN growth works as expected.
|
||||
*/
|
||||
|
@ -1142,43 +1239,107 @@ void
|
|||
ztest_vdev_LUN_growth(ztest_args_t *za)
|
||||
{
|
||||
spa_t *spa = za->za_spa;
|
||||
char dev_name[MAXPATHLEN];
|
||||
uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
|
||||
uint64_t vdev;
|
||||
size_t fsize;
|
||||
int fd;
|
||||
vdev_t *vd, *tvd = NULL;
|
||||
size_t psize, newsize;
|
||||
uint64_t spa_newsize, spa_cursize, ms_count;
|
||||
|
||||
(void) mutex_lock(&ztest_shared->zs_vdev_lock);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
|
||||
|
||||
/*
|
||||
* Pick a random leaf vdev.
|
||||
*/
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
while (tvd == NULL || tvd->vdev_islog) {
|
||||
uint64_t vdev;
|
||||
|
||||
(void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
|
||||
|
||||
if ((fd = open(dev_name, O_RDWR)) != -1) {
|
||||
/*
|
||||
* Determine the size.
|
||||
*/
|
||||
fsize = lseek(fd, 0, SEEK_END);
|
||||
|
||||
/*
|
||||
* If it's less than 2x the original size, grow by around 3%.
|
||||
*/
|
||||
if (fsize < 2 * zopt_vdev_size) {
|
||||
size_t newsize = fsize + ztest_random(fsize / 32);
|
||||
(void) ftruncate(fd, newsize);
|
||||
if (zopt_verbose >= 6) {
|
||||
(void) printf("%s grew from %lu to %lu bytes\n",
|
||||
dev_name, (ulong_t)fsize, (ulong_t)newsize);
|
||||
}
|
||||
}
|
||||
(void) close(fd);
|
||||
vdev = ztest_random(spa->spa_root_vdev->vdev_children);
|
||||
tvd = spa->spa_root_vdev->vdev_child[vdev];
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the size of the first leaf vdev associated with
|
||||
* our top-level device.
|
||||
*/
|
||||
vd = vdev_walk_tree(tvd, NULL, NULL);
|
||||
ASSERT3P(vd, !=, NULL);
|
||||
ASSERT(vd->vdev_ops->vdev_op_leaf);
|
||||
|
||||
psize = vd->vdev_psize;
|
||||
|
||||
/*
|
||||
* We only try to expand the vdev if it's less than 4x its
|
||||
* original size and it has a valid psize.
|
||||
*/
|
||||
if (psize == 0 || psize >= 4 * zopt_vdev_size) {
|
||||
spa_config_exit(spa, SCL_STATE, spa);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
|
||||
return;
|
||||
}
|
||||
ASSERT(psize > 0);
|
||||
newsize = psize + psize / 8;
|
||||
ASSERT3U(newsize, >, psize);
|
||||
|
||||
if (zopt_verbose >= 6) {
|
||||
(void) printf("Expanding vdev %s from %lu to %lu\n",
|
||||
vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
|
||||
}
|
||||
|
||||
spa_cursize = spa_get_space(spa);
|
||||
ms_count = tvd->vdev_ms_count;
|
||||
|
||||
/*
|
||||
* Growing the vdev is a two step process:
|
||||
* 1). expand the physical size (i.e. relabel)
|
||||
* 2). online the vdev to create the new metaslabs
|
||||
*/
|
||||
if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
|
||||
vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
|
||||
tvd->vdev_state != VDEV_STATE_HEALTHY) {
|
||||
if (zopt_verbose >= 5) {
|
||||
(void) printf("Could not expand LUN because "
|
||||
"some vdevs were not healthy\n");
|
||||
}
|
||||
(void) spa_config_exit(spa, SCL_STATE, spa);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
(void) spa_config_exit(spa, SCL_STATE, spa);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
/*
|
||||
* Expanding the LUN will update the config asynchronously,
|
||||
* thus we must wait for the async thread to complete any
|
||||
* pending tasks before proceeding.
|
||||
*/
|
||||
mutex_enter(&spa->spa_async_lock);
|
||||
while (spa->spa_async_thread != NULL || spa->spa_async_tasks)
|
||||
cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
|
||||
mutex_exit(&spa->spa_async_lock);
|
||||
|
||||
spa_config_enter(spa, SCL_STATE, spa, RW_READER);
|
||||
spa_newsize = spa_get_space(spa);
|
||||
|
||||
/*
|
||||
* Make sure we were able to grow the pool.
|
||||
*/
|
||||
if (ms_count >= tvd->vdev_ms_count ||
|
||||
spa_cursize >= spa_newsize) {
|
||||
(void) printf("Top-level vdev metaslab count: "
|
||||
"before %llu, after %llu\n",
|
||||
(u_longlong_t)ms_count,
|
||||
(u_longlong_t)tvd->vdev_ms_count);
|
||||
fatal(0, "LUN expansion failed: before %llu, "
|
||||
"after %llu\n", spa_cursize, spa_newsize);
|
||||
} else if (zopt_verbose >= 5) {
|
||||
char oldnumbuf[6], newnumbuf[6];
|
||||
|
||||
nicenum(spa_cursize, oldnumbuf);
|
||||
nicenum(spa_newsize, newnumbuf);
|
||||
(void) printf("%s grew from %s to %s\n",
|
||||
spa->spa_name, oldnumbuf, newnumbuf);
|
||||
}
|
||||
spa_config_exit(spa, SCL_STATE, spa);
|
||||
(void) mutex_unlock(&ztest_shared->zs_vdev_lock);
|
||||
}
|
||||
|
||||
|
@ -1425,7 +1586,8 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
|
|||
error = dmu_objset_destroy(snapname);
|
||||
if (error != 0 && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy() = %d", error);
|
||||
error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE);
|
||||
error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1,
|
||||
NULL, FALSE);
|
||||
if (error == ENOSPC)
|
||||
ztest_record_enospc("dmu_take_snapshot");
|
||||
else if (error != 0 && error != EEXIST)
|
||||
|
@ -1433,6 +1595,148 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
|
|||
(void) rw_unlock(&ztest_shared->zs_name_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup non-standard snapshots and clones.
|
||||
*/
|
||||
void
|
||||
ztest_dsl_dataset_cleanup(char *osname, uint64_t curval)
|
||||
{
|
||||
char snap1name[100];
|
||||
char clone1name[100];
|
||||
char snap2name[100];
|
||||
char clone2name[100];
|
||||
char snap3name[100];
|
||||
int error;
|
||||
|
||||
(void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval);
|
||||
(void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval);
|
||||
(void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval);
|
||||
(void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval);
|
||||
(void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval);
|
||||
|
||||
error = dmu_objset_destroy(clone2name);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
|
||||
error = dmu_objset_destroy(snap3name);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
|
||||
error = dmu_objset_destroy(snap2name);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
|
||||
error = dmu_objset_destroy(clone1name);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
|
||||
error = dmu_objset_destroy(snap1name);
|
||||
if (error && error != ENOENT)
|
||||
fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify dsl_dataset_promote handles EBUSY
|
||||
*/
|
||||
void
|
||||
ztest_dsl_dataset_promote_busy(ztest_args_t *za)
|
||||
{
|
||||
int error;
|
||||
objset_t *os = za->za_os;
|
||||
objset_t *clone;
|
||||
dsl_dataset_t *ds;
|
||||
char snap1name[100];
|
||||
char clone1name[100];
|
||||
char snap2name[100];
|
||||
char clone2name[100];
|
||||
char snap3name[100];
|
||||
char osname[MAXNAMELEN];
|
||||
uint64_t curval = za->za_instance;
|
||||
|
||||
(void) rw_rdlock(&ztest_shared->zs_name_lock);
|
||||
|
||||
dmu_objset_name(os, osname);
|
||||
ztest_dsl_dataset_cleanup(osname, curval);
|
||||
|
||||
(void) snprintf(snap1name, 100, "%s@s1_%llu", osname, curval);
|
||||
(void) snprintf(clone1name, 100, "%s/c1_%llu", osname, curval);
|
||||
(void) snprintf(snap2name, 100, "%s@s2_%llu", clone1name, curval);
|
||||
(void) snprintf(clone2name, 100, "%s/c2_%llu", osname, curval);
|
||||
(void) snprintf(snap3name, 100, "%s@s3_%llu", clone1name, curval);
|
||||
|
||||
error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
|
||||
NULL, FALSE);
|
||||
if (error && error != EEXIST) {
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dmu_take_snapshot");
|
||||
goto out;
|
||||
}
|
||||
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
|
||||
}
|
||||
|
||||
error = dmu_objset_open(snap1name, DMU_OST_OTHER,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &clone);
|
||||
if (error)
|
||||
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
|
||||
|
||||
error = dmu_objset_create(clone1name, DMU_OST_OTHER, clone, 0,
|
||||
NULL, NULL);
|
||||
dmu_objset_close(clone);
|
||||
if (error) {
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dmu_objset_create");
|
||||
goto out;
|
||||
}
|
||||
fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
|
||||
}
|
||||
|
||||
error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
|
||||
NULL, FALSE);
|
||||
if (error && error != EEXIST) {
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dmu_take_snapshot");
|
||||
goto out;
|
||||
}
|
||||
fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
|
||||
}
|
||||
|
||||
error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
|
||||
NULL, FALSE);
|
||||
if (error && error != EEXIST) {
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dmu_take_snapshot");
|
||||
goto out;
|
||||
}
|
||||
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
|
||||
}
|
||||
|
||||
error = dmu_objset_open(snap3name, DMU_OST_OTHER,
|
||||
DS_MODE_USER | DS_MODE_READONLY, &clone);
|
||||
if (error)
|
||||
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
|
||||
|
||||
error = dmu_objset_create(clone2name, DMU_OST_OTHER, clone, 0,
|
||||
NULL, NULL);
|
||||
dmu_objset_close(clone);
|
||||
if (error) {
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dmu_objset_create");
|
||||
goto out;
|
||||
}
|
||||
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
|
||||
}
|
||||
|
||||
error = dsl_dataset_own(snap1name, DS_MODE_READONLY, FTAG, &ds);
|
||||
if (error)
|
||||
fatal(0, "dsl_dataset_own(%s) = %d", snap1name, error);
|
||||
error = dsl_dataset_promote(clone2name);
|
||||
if (error != EBUSY)
|
||||
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
|
||||
error);
|
||||
dsl_dataset_disown(ds, FTAG);
|
||||
|
||||
out:
|
||||
ztest_dsl_dataset_cleanup(osname, curval);
|
||||
|
||||
(void) rw_unlock(&ztest_shared->zs_name_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that dmu_object_{alloc,free} work as expected.
|
||||
*/
|
||||
|
@ -1456,7 +1760,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
|
|||
* Create a batch object if necessary, and record it in the directory.
|
||||
*/
|
||||
VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
|
||||
sizeof (uint64_t), &batchobj));
|
||||
sizeof (uint64_t), &batchobj, DMU_READ_PREFETCH));
|
||||
if (batchobj == 0) {
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
|
||||
|
@ -1481,7 +1785,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
|
|||
*/
|
||||
for (b = 0; b < batchsize; b++) {
|
||||
VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
|
||||
sizeof (uint64_t), &object));
|
||||
sizeof (uint64_t), &object, DMU_READ_PREFETCH));
|
||||
if (object == 0)
|
||||
continue;
|
||||
/*
|
||||
|
@ -1516,7 +1820,7 @@ ztest_dmu_object_alloc_free(ztest_args_t *za)
|
|||
* We expect the word at endoff to be our object number.
|
||||
*/
|
||||
VERIFY(0 == dmu_read(os, object, endoff,
|
||||
sizeof (uint64_t), &temp));
|
||||
sizeof (uint64_t), &temp, DMU_READ_PREFETCH));
|
||||
|
||||
if (temp != object) {
|
||||
fatal(0, "bad data in %s, got %llu, expected %llu",
|
||||
|
@ -1701,7 +2005,7 @@ ztest_dmu_read_write(ztest_args_t *za)
|
|||
* Read the directory info. If it's the first time, set things up.
|
||||
*/
|
||||
VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
|
||||
sizeof (dd), &dd));
|
||||
sizeof (dd), &dd, DMU_READ_PREFETCH));
|
||||
if (dd.dd_chunk == 0) {
|
||||
ASSERT(dd.dd_packobj == 0);
|
||||
ASSERT(dd.dd_bigobj == 0);
|
||||
|
@ -1763,9 +2067,11 @@ ztest_dmu_read_write(ztest_args_t *za)
|
|||
/*
|
||||
* Read the current contents of our objects.
|
||||
*/
|
||||
error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
|
||||
error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf,
|
||||
DMU_READ_PREFETCH);
|
||||
ASSERT3U(error, ==, 0);
|
||||
error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
|
||||
error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf,
|
||||
DMU_READ_PREFETCH);
|
||||
ASSERT3U(error, ==, 0);
|
||||
|
||||
/*
|
||||
|
@ -1871,9 +2177,9 @@ ztest_dmu_read_write(ztest_args_t *za)
|
|||
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
|
||||
|
||||
VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
|
||||
packsize, packcheck));
|
||||
packsize, packcheck, DMU_READ_PREFETCH));
|
||||
VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
|
||||
bigsize, bigcheck));
|
||||
bigsize, bigcheck, DMU_READ_PREFETCH));
|
||||
|
||||
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
|
||||
ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
|
||||
|
@ -1886,6 +2192,314 @@ ztest_dmu_read_write(ztest_args_t *za)
|
|||
umem_free(bigbuf, bigsize);
|
||||
}
|
||||
|
||||
void
|
||||
compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
|
||||
uint64_t bigsize, uint64_t n, dmu_read_write_dir_t dd, uint64_t txg)
|
||||
{
|
||||
uint64_t i;
|
||||
bufwad_t *pack;
|
||||
bufwad_t *bigH;
|
||||
bufwad_t *bigT;
|
||||
|
||||
/*
|
||||
* For each index from n to n + s, verify that the existing bufwad
|
||||
* in packobj matches the bufwads at the head and tail of the
|
||||
* corresponding chunk in bigobj. Then update all three bufwads
|
||||
* with the new values we want to write out.
|
||||
*/
|
||||
for (i = 0; i < s; i++) {
|
||||
/* LINTED */
|
||||
pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
|
||||
/* LINTED */
|
||||
bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
|
||||
/* LINTED */
|
||||
bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
|
||||
|
||||
ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
|
||||
ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
|
||||
|
||||
if (pack->bw_txg > txg)
|
||||
fatal(0, "future leak: got %llx, open txg is %llx",
|
||||
pack->bw_txg, txg);
|
||||
|
||||
if (pack->bw_data != 0 && pack->bw_index != n + i)
|
||||
fatal(0, "wrong index: got %llx, wanted %llx+%llx",
|
||||
pack->bw_index, n, i);
|
||||
|
||||
if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
|
||||
fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
|
||||
|
||||
if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
|
||||
fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
|
||||
|
||||
pack->bw_index = n + i;
|
||||
pack->bw_txg = txg;
|
||||
pack->bw_data = 1 + ztest_random(-2ULL);
|
||||
|
||||
*bigH = *pack;
|
||||
*bigT = *pack;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ztest_dmu_read_write_zcopy(ztest_args_t *za)
|
||||
{
|
||||
objset_t *os = za->za_os;
|
||||
dmu_read_write_dir_t dd;
|
||||
dmu_tx_t *tx;
|
||||
uint64_t i;
|
||||
int error;
|
||||
uint64_t n, s, txg;
|
||||
bufwad_t *packbuf, *bigbuf;
|
||||
uint64_t packoff, packsize, bigoff, bigsize;
|
||||
uint64_t regions = 997;
|
||||
uint64_t stride = 123456789ULL;
|
||||
uint64_t width = 9;
|
||||
dmu_buf_t *bonus_db;
|
||||
arc_buf_t **bigbuf_arcbufs;
|
||||
dmu_object_info_t *doi = &za->za_doi;
|
||||
|
||||
/*
|
||||
* This test uses two objects, packobj and bigobj, that are always
|
||||
* updated together (i.e. in the same tx) so that their contents are
|
||||
* in sync and can be compared. Their contents relate to each other
|
||||
* in a simple way: packobj is a dense array of 'bufwad' structures,
|
||||
* while bigobj is a sparse array of the same bufwads. Specifically,
|
||||
* for any index n, there are three bufwads that should be identical:
|
||||
*
|
||||
* packobj, at offset n * sizeof (bufwad_t)
|
||||
* bigobj, at the head of the nth chunk
|
||||
* bigobj, at the tail of the nth chunk
|
||||
*
|
||||
* The chunk size is set equal to bigobj block size so that
|
||||
* dmu_assign_arcbuf() can be tested for object updates.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Read the directory info. If it's the first time, set things up.
|
||||
*/
|
||||
VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
|
||||
sizeof (dd), &dd, DMU_READ_PREFETCH));
|
||||
if (dd.dd_chunk == 0) {
|
||||
ASSERT(dd.dd_packobj == 0);
|
||||
ASSERT(dd.dd_bigobj == 0);
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
ztest_record_enospc("create r/w directory");
|
||||
dmu_tx_abort(tx);
|
||||
return;
|
||||
}
|
||||
|
||||
dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
ztest_set_random_blocksize(os, dd.dd_packobj, tx);
|
||||
ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
|
||||
|
||||
VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
|
||||
ASSERT(doi->doi_data_block_size >= 2 * sizeof (bufwad_t));
|
||||
ASSERT(ISP2(doi->doi_data_block_size));
|
||||
dd.dd_chunk = doi->doi_data_block_size;
|
||||
|
||||
dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
|
||||
tx);
|
||||
dmu_tx_commit(tx);
|
||||
} else {
|
||||
VERIFY(dmu_object_info(os, dd.dd_bigobj, doi) == 0);
|
||||
VERIFY(ISP2(doi->doi_data_block_size));
|
||||
VERIFY(dd.dd_chunk == doi->doi_data_block_size);
|
||||
VERIFY(dd.dd_chunk >= 2 * sizeof (bufwad_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Pick a random index and compute the offsets into packobj and bigobj.
|
||||
*/
|
||||
n = ztest_random(regions) * stride + ztest_random(width);
|
||||
s = 1 + ztest_random(width - 1);
|
||||
|
||||
packoff = n * sizeof (bufwad_t);
|
||||
packsize = s * sizeof (bufwad_t);
|
||||
|
||||
bigoff = n * dd.dd_chunk;
|
||||
bigsize = s * dd.dd_chunk;
|
||||
|
||||
packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
|
||||
bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
|
||||
|
||||
VERIFY(dmu_bonus_hold(os, dd.dd_bigobj, FTAG, &bonus_db) == 0);
|
||||
|
||||
bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
|
||||
|
||||
/*
|
||||
* Iteration 0 test zcopy for DB_UNCACHED dbufs.
|
||||
* Iteration 1 test zcopy to already referenced dbufs.
|
||||
* Iteration 2 test zcopy to dirty dbuf in the same txg.
|
||||
* Iteration 3 test zcopy to dbuf dirty in previous txg.
|
||||
* Iteration 4 test zcopy when dbuf is no longer dirty.
|
||||
* Iteration 5 test zcopy when it can't be done.
|
||||
* Iteration 6 one more zcopy write.
|
||||
*/
|
||||
for (i = 0; i < 7; i++) {
|
||||
uint64_t j;
|
||||
uint64_t off;
|
||||
|
||||
/*
|
||||
* In iteration 5 (i == 5) use arcbufs
|
||||
* that don't match bigobj blksz to test
|
||||
* dmu_assign_arcbuf() when it can't directly
|
||||
* assign an arcbuf to a dbuf.
|
||||
*/
|
||||
for (j = 0; j < s; j++) {
|
||||
if (i != 5) {
|
||||
bigbuf_arcbufs[j] =
|
||||
dmu_request_arcbuf(bonus_db,
|
||||
dd.dd_chunk);
|
||||
} else {
|
||||
bigbuf_arcbufs[2 * j] =
|
||||
dmu_request_arcbuf(bonus_db,
|
||||
dd.dd_chunk / 2);
|
||||
bigbuf_arcbufs[2 * j + 1] =
|
||||
dmu_request_arcbuf(bonus_db,
|
||||
dd.dd_chunk / 2);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a tx for the mods to both packobj and bigobj.
|
||||
*/
|
||||
tx = dmu_tx_create(os);
|
||||
|
||||
dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
|
||||
dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
|
||||
|
||||
if (ztest_random(100) == 0) {
|
||||
error = -1;
|
||||
} else {
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error != -1) {
|
||||
ztest_record_enospc("dmu r/w range");
|
||||
}
|
||||
dmu_tx_abort(tx);
|
||||
umem_free(packbuf, packsize);
|
||||
umem_free(bigbuf, bigsize);
|
||||
for (j = 0; j < s; j++) {
|
||||
if (i != 5) {
|
||||
dmu_return_arcbuf(bigbuf_arcbufs[j]);
|
||||
} else {
|
||||
dmu_return_arcbuf(
|
||||
bigbuf_arcbufs[2 * j]);
|
||||
dmu_return_arcbuf(
|
||||
bigbuf_arcbufs[2 * j + 1]);
|
||||
}
|
||||
}
|
||||
umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
|
||||
dmu_buf_rele(bonus_db, FTAG);
|
||||
return;
|
||||
}
|
||||
|
||||
txg = dmu_tx_get_txg(tx);
|
||||
|
||||
/*
|
||||
* 50% of the time don't read objects in the 1st iteration to
|
||||
* test dmu_assign_arcbuf() for the case when there're no
|
||||
* existing dbufs for the specified offsets.
|
||||
*/
|
||||
if (i != 0 || ztest_random(2) != 0) {
|
||||
error = dmu_read(os, dd.dd_packobj, packoff,
|
||||
packsize, packbuf, DMU_READ_PREFETCH);
|
||||
ASSERT3U(error, ==, 0);
|
||||
error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize,
|
||||
bigbuf, DMU_READ_PREFETCH);
|
||||
ASSERT3U(error, ==, 0);
|
||||
}
|
||||
compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
|
||||
n, dd, txg);
|
||||
|
||||
/*
|
||||
* We've verified all the old bufwads, and made new ones.
|
||||
* Now write them out.
|
||||
*/
|
||||
dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
|
||||
if (zopt_verbose >= 6) {
|
||||
(void) printf("writing offset %llx size %llx"
|
||||
" txg %llx\n",
|
||||
(u_longlong_t)bigoff,
|
||||
(u_longlong_t)bigsize,
|
||||
(u_longlong_t)txg);
|
||||
}
|
||||
for (off = bigoff, j = 0; j < s; j++, off += dd.dd_chunk) {
|
||||
dmu_buf_t *dbt;
|
||||
if (i != 5) {
|
||||
bcopy((caddr_t)bigbuf + (off - bigoff),
|
||||
bigbuf_arcbufs[j]->b_data, dd.dd_chunk);
|
||||
} else {
|
||||
bcopy((caddr_t)bigbuf + (off - bigoff),
|
||||
bigbuf_arcbufs[2 * j]->b_data,
|
||||
dd.dd_chunk / 2);
|
||||
bcopy((caddr_t)bigbuf + (off - bigoff) +
|
||||
dd.dd_chunk / 2,
|
||||
bigbuf_arcbufs[2 * j + 1]->b_data,
|
||||
dd.dd_chunk / 2);
|
||||
}
|
||||
|
||||
if (i == 1) {
|
||||
VERIFY(dmu_buf_hold(os, dd.dd_bigobj, off,
|
||||
FTAG, &dbt) == 0);
|
||||
}
|
||||
if (i != 5) {
|
||||
dmu_assign_arcbuf(bonus_db, off,
|
||||
bigbuf_arcbufs[j], tx);
|
||||
} else {
|
||||
dmu_assign_arcbuf(bonus_db, off,
|
||||
bigbuf_arcbufs[2 * j], tx);
|
||||
dmu_assign_arcbuf(bonus_db,
|
||||
off + dd.dd_chunk / 2,
|
||||
bigbuf_arcbufs[2 * j + 1], tx);
|
||||
}
|
||||
if (i == 1) {
|
||||
dmu_buf_rele(dbt, FTAG);
|
||||
}
|
||||
}
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
/*
|
||||
* Sanity check the stuff we just wrote.
|
||||
*/
|
||||
{
|
||||
void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
|
||||
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
|
||||
|
||||
VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
|
||||
packsize, packcheck, DMU_READ_PREFETCH));
|
||||
VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
|
||||
bigsize, bigcheck, DMU_READ_PREFETCH));
|
||||
|
||||
ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
|
||||
ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
|
||||
|
||||
umem_free(packcheck, packsize);
|
||||
umem_free(bigcheck, bigsize);
|
||||
}
|
||||
if (i == 2) {
|
||||
txg_wait_open(dmu_objset_pool(os), 0);
|
||||
} else if (i == 3) {
|
||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
||||
}
|
||||
}
|
||||
|
||||
dmu_buf_rele(bonus_db, FTAG);
|
||||
umem_free(packbuf, packsize);
|
||||
umem_free(bigbuf, bigsize);
|
||||
umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
|
||||
}
|
||||
|
||||
void
|
||||
ztest_dmu_check_future_leak(ztest_args_t *za)
|
||||
{
|
||||
|
@ -1935,6 +2549,8 @@ ztest_dmu_write_parallel(ztest_args_t *za)
|
|||
uint64_t blkoff;
|
||||
zbookmark_t zb;
|
||||
dmu_tx_t *tx = dmu_tx_create(os);
|
||||
dmu_buf_t *bonus_db;
|
||||
arc_buf_t *abuf = NULL;
|
||||
|
||||
dmu_objset_name(os, osname);
|
||||
|
||||
|
@ -1963,6 +2579,12 @@ ztest_dmu_write_parallel(ztest_args_t *za)
|
|||
}
|
||||
}
|
||||
|
||||
if (off != -1ULL && P2PHASE(off, bs) == 0 && !do_free &&
|
||||
ztest_random(8) == 0) {
|
||||
VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &bonus_db) == 0);
|
||||
abuf = dmu_request_arcbuf(bonus_db, bs);
|
||||
}
|
||||
|
||||
txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
|
||||
error = dmu_tx_assign(tx, txg_how);
|
||||
if (error) {
|
||||
|
@ -1973,6 +2595,10 @@ ztest_dmu_write_parallel(ztest_args_t *za)
|
|||
ztest_record_enospc("dmu write parallel");
|
||||
}
|
||||
dmu_tx_abort(tx);
|
||||
if (abuf != NULL) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
dmu_buf_rele(bonus_db, FTAG);
|
||||
}
|
||||
return;
|
||||
}
|
||||
txg = dmu_tx_get_txg(tx);
|
||||
|
@ -2027,8 +2653,12 @@ ztest_dmu_write_parallel(ztest_args_t *za)
|
|||
za->za_dbuf = NULL;
|
||||
} else if (do_free) {
|
||||
VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0);
|
||||
} else {
|
||||
} else if (abuf == NULL) {
|
||||
dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx);
|
||||
} else {
|
||||
bcopy(wbt, abuf->b_data, btsize);
|
||||
dmu_assign_arcbuf(bonus_db, off, abuf, tx);
|
||||
dmu_buf_rele(bonus_db, FTAG);
|
||||
}
|
||||
|
||||
(void) mutex_unlock(lp);
|
||||
|
@ -2064,16 +2694,20 @@ ztest_dmu_write_parallel(ztest_args_t *za)
|
|||
dmu_buf_rele(db, FTAG);
|
||||
za->za_dbuf = NULL;
|
||||
|
||||
(void) mutex_unlock(lp);
|
||||
|
||||
if (error)
|
||||
if (error) {
|
||||
(void) mutex_unlock(lp);
|
||||
return;
|
||||
}
|
||||
|
||||
if (blk.blk_birth == 0) /* concurrent free */
|
||||
if (blk.blk_birth == 0) { /* concurrent free */
|
||||
(void) mutex_unlock(lp);
|
||||
return;
|
||||
}
|
||||
|
||||
txg_suspend(dmu_objset_pool(os));
|
||||
|
||||
(void) mutex_unlock(lp);
|
||||
|
||||
ASSERT(blk.blk_fill == 1);
|
||||
ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
|
||||
ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
|
||||
|
@ -2146,7 +2780,7 @@ ztest_zap(ztest_args_t *za)
|
|||
* Create a new object if necessary, and record it in the directory.
|
||||
*/
|
||||
VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
|
||||
sizeof (uint64_t), &object));
|
||||
sizeof (uint64_t), &object, DMU_READ_PREFETCH));
|
||||
|
||||
if (object == 0) {
|
||||
tx = dmu_tx_create(os);
|
||||
|
@ -2799,7 +3433,7 @@ ztest_verify_blocks(char *pool)
|
|||
isa = strdup(isa);
|
||||
/* LINTED */
|
||||
(void) sprintf(bin,
|
||||
"/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
|
||||
"/usr/sbin%.*s/zdb -bcc%s%s -U /tmp/zpool.cache %s",
|
||||
isalen,
|
||||
isa,
|
||||
zopt_verbose >= 3 ? "s" : "",
|
||||
|
@ -2944,7 +3578,7 @@ ztest_resume(spa_t *spa)
|
|||
spa_vdev_state_enter(spa);
|
||||
vdev_clear(spa, NULL);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
zio_resume(spa);
|
||||
(void) zio_resume(spa);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3216,6 +3850,10 @@ ztest_run(char *pool)
|
|||
(void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
|
||||
if (zopt_verbose >= 3)
|
||||
(void) printf("Destroying %s to free up space\n", name);
|
||||
|
||||
/* Cleanup any non-standard clones and snapshots */
|
||||
ztest_dsl_dataset_cleanup(name, za[d].za_instance);
|
||||
|
||||
(void) dmu_objset_find(name, ztest_destroy_cb, &za[d],
|
||||
DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
|
||||
(void) rw_unlock(&ztest_shared->zs_name_lock);
|
||||
|
@ -3296,6 +3934,8 @@ ztest_init(char *pool)
|
|||
if (error)
|
||||
fatal(0, "spa_open() = %d", error);
|
||||
|
||||
metaslab_sz = 1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
|
||||
|
||||
if (zopt_verbose >= 3)
|
||||
show_pool_stats(spa);
|
||||
|
||||
|
@ -3387,6 +4027,9 @@ main(int argc, char **argv)
|
|||
zi->zi_call_time = 0;
|
||||
}
|
||||
|
||||
/* Set the allocation switch size */
|
||||
metaslab_df_alloc_threshold = ztest_random(metaslab_sz / 4) + 1;
|
||||
|
||||
pid = fork();
|
||||
|
||||
if (pid == -1)
|
||||
|
|
|
@ -116,6 +116,7 @@ enum {
|
|||
EZFS_VDEVNOTSUP, /* unsupported vdev type */
|
||||
EZFS_NOTSUP, /* ops not supported on this dataset */
|
||||
EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */
|
||||
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
|
||||
EZFS_UNKNOWN
|
||||
};
|
||||
|
||||
|
@ -178,6 +179,7 @@ extern const char *libzfs_error_action(libzfs_handle_t *);
|
|||
extern const char *libzfs_error_description(libzfs_handle_t *);
|
||||
extern void libzfs_mnttab_init(libzfs_handle_t *);
|
||||
extern void libzfs_mnttab_fini(libzfs_handle_t *);
|
||||
extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
|
||||
extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
|
||||
struct mnttab *);
|
||||
extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
|
||||
|
@ -229,6 +231,8 @@ extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
|
|||
|
||||
extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
|
||||
boolean_t *, boolean_t *);
|
||||
extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
|
||||
boolean_t *, boolean_t *, boolean_t *);
|
||||
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
|
||||
|
||||
/*
|
||||
|
@ -335,7 +339,8 @@ extern int zpool_stage_history(libzfs_handle_t *, const char *);
|
|||
extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
|
||||
size_t len);
|
||||
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
|
||||
extern int zpool_get_physpath(zpool_handle_t *, char *);
|
||||
extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
|
||||
|
||||
/*
|
||||
* Basic handle manipulations. These functions do not create or destroy the
|
||||
* underlying datasets, only the references to them.
|
||||
|
@ -368,6 +373,10 @@ extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
|
|||
zprop_source_t *, char *, size_t, boolean_t);
|
||||
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
|
||||
zprop_source_t *, char *, size_t);
|
||||
extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
|
||||
uint64_t *propvalue);
|
||||
extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
|
||||
char *propbuf, int proplen, boolean_t literal);
|
||||
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
|
||||
extern int zfs_prop_inherit(zfs_handle_t *, const char *);
|
||||
extern const char *zfs_prop_values(zfs_prop_t);
|
||||
|
@ -384,6 +393,7 @@ typedef struct zprop_list {
|
|||
} zprop_list_t;
|
||||
|
||||
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
|
||||
extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
|
||||
|
||||
#define ZFS_MOUNTPOINT_NONE "none"
|
||||
#define ZFS_MOUNTPOINT_LEGACY "legacy"
|
||||
|
@ -454,6 +464,12 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
|
|||
boolean_t, boolean_t, boolean_t, boolean_t, int);
|
||||
extern int zfs_promote(zfs_handle_t *);
|
||||
|
||||
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
|
||||
uid_t rid, uint64_t space);
|
||||
|
||||
extern int zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
|
||||
zfs_userspace_cb_t func, void *arg);
|
||||
|
||||
typedef struct recvflags {
|
||||
/* print informational messages (ie, -v was specified) */
|
||||
int verbose : 1;
|
||||
|
@ -491,17 +507,6 @@ extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
|
|||
zfs_type_t);
|
||||
extern int zfs_spa_version(zfs_handle_t *, int *);
|
||||
|
||||
/*
|
||||
* dataset permission functions.
|
||||
*/
|
||||
extern int zfs_perm_set(zfs_handle_t *, nvlist_t *);
|
||||
extern int zfs_perm_remove(zfs_handle_t *, nvlist_t *);
|
||||
extern int zfs_build_perms(zfs_handle_t *, char *, char *,
|
||||
zfs_deleg_who_type_t, zfs_deleg_inherit_t, nvlist_t **nvlist_t);
|
||||
extern int zfs_perm_get(zfs_handle_t *, zfs_allow_t **);
|
||||
extern void zfs_free_allows(zfs_allow_t *);
|
||||
extern void zfs_deleg_permissions(void);
|
||||
|
||||
/*
|
||||
* Mount support functions.
|
||||
*/
|
||||
|
@ -536,7 +541,7 @@ extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
|
|||
extern int zfs_share_iscsi(zfs_handle_t *);
|
||||
extern int zfs_unshare_iscsi(zfs_handle_t *);
|
||||
extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
|
||||
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *,
|
||||
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
|
||||
void *, void *, int, zfs_share_op_t);
|
||||
|
||||
/*
|
||||
|
@ -574,6 +579,15 @@ extern int zpool_remove_zvol_links(zpool_handle_t *);
|
|||
/* is this zvol valid for use as a dump device? */
|
||||
extern int zvol_check_dump_config(char *);
|
||||
|
||||
/*
|
||||
* Management interfaces for SMB ACL files
|
||||
*/
|
||||
|
||||
int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
|
||||
int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
|
||||
int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
|
||||
int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
|
||||
|
||||
/*
|
||||
* Enable and disable datasets within a pool by mounting/unmounting and
|
||||
* sharing/unsharing them.
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -63,6 +63,7 @@ struct libzfs_handle {
|
|||
int libzfs_printerr;
|
||||
void *libzfs_sharehdl; /* libshare handle */
|
||||
uint_t libzfs_shareflags;
|
||||
boolean_t libzfs_mnttab_enable;
|
||||
avl_tree_t libzfs_mnttab_cache;
|
||||
};
|
||||
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
|
||||
|
@ -78,6 +79,7 @@ struct zfs_handle {
|
|||
nvlist_t *zfs_user_props;
|
||||
boolean_t zfs_mntcheck;
|
||||
char *zfs_mntopts;
|
||||
uint8_t *zfs_props_table;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -185,7 +187,7 @@ extern int zfs_init_libshare(libzfs_handle_t *, int);
|
|||
extern void zfs_uninit_libshare(libzfs_handle_t *);
|
||||
extern int zfs_parse_options(char *, zfs_share_proto_t);
|
||||
|
||||
extern int zfs_unshare_proto(zfs_handle_t *zhp,
|
||||
extern int zfs_unshare_proto(zfs_handle_t *,
|
||||
const char *, zfs_share_proto_t *);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Portions Copyright 2007 Ramprakash Jelari
|
||||
|
@ -222,6 +222,7 @@ changelist_postfix(prop_changelist_t *clp)
|
|||
|
||||
boolean_t sharenfs;
|
||||
boolean_t sharesmb;
|
||||
boolean_t mounted;
|
||||
|
||||
/*
|
||||
* If we are in the global zone, but this dataset is exported
|
||||
|
@ -276,20 +277,29 @@ changelist_postfix(prop_changelist_t *clp)
|
|||
shareopts, sizeof (shareopts), NULL, NULL, 0,
|
||||
B_FALSE) == 0) && (strcmp(shareopts, "off") != 0));
|
||||
|
||||
if ((cn->cn_mounted || clp->cl_waslegacy || sharenfs ||
|
||||
sharesmb) && !zfs_is_mounted(cn->cn_handle, NULL) &&
|
||||
zfs_mount(cn->cn_handle, NULL, 0) != 0)
|
||||
errors++;
|
||||
mounted = zfs_is_mounted(cn->cn_handle, NULL);
|
||||
|
||||
if (!mounted && (cn->cn_mounted ||
|
||||
((sharenfs || sharesmb || clp->cl_waslegacy) &&
|
||||
(zfs_prop_get_int(cn->cn_handle,
|
||||
ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON)))) {
|
||||
|
||||
if (zfs_mount(cn->cn_handle, NULL, 0) != 0)
|
||||
errors++;
|
||||
else
|
||||
mounted = TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* We always re-share even if the filesystem is currently
|
||||
* shared, so that we can adopt any new options.
|
||||
* If the file system is mounted we always re-share even
|
||||
* if the filesystem is currently shared, so that we can
|
||||
* adopt any new options.
|
||||
*/
|
||||
if (sharenfs)
|
||||
if (sharenfs && mounted)
|
||||
errors += zfs_share_nfs(cn->cn_handle);
|
||||
else if (cn->cn_shared || clp->cl_waslegacy)
|
||||
errors += zfs_unshare_nfs(cn->cn_handle, NULL);
|
||||
if (sharesmb)
|
||||
if (sharesmb && mounted)
|
||||
errors += zfs_share_smb(cn->cn_handle);
|
||||
else if (cn->cn_shared || clp->cl_waslegacy)
|
||||
errors += zfs_unshare_smb(cn->cn_handle, NULL);
|
||||
|
@ -625,8 +635,6 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
|
|||
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
|
||||
} else if (prop == ZFS_PROP_VOLSIZE) {
|
||||
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
|
||||
} else if (prop == ZFS_PROP_VERSION) {
|
||||
clp->cl_prop = ZFS_PROP_MOUNTPOINT;
|
||||
} else {
|
||||
clp->cl_prop = prop;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Iterate over all children of the current object. This includes the normal
|
||||
* dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
|
||||
|
@ -399,13 +397,6 @@ iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
|
|||
for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
|
||||
ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
|
||||
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
|
||||
|
||||
/*
|
||||
* Ignore private dataset names.
|
||||
*/
|
||||
if (dataset_name_hidden(zc.zc_name))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Get statistics for this dataset, to determine the type of the
|
||||
* dataset and clone statistics. If this fails, the dataset has
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <strings.h>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
|
@ -55,6 +56,10 @@ static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
|
|||
#define BOOTCMD "installboot(1M)"
|
||||
#endif
|
||||
|
||||
#define DISK_ROOT "/dev/dsk"
|
||||
#define RDISK_ROOT "/dev/rdsk"
|
||||
#define BACKUP_SLICE "s2"
|
||||
|
||||
/*
|
||||
* ====================================================================
|
||||
* zpool property functions
|
||||
|
@ -631,6 +636,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* Don't start the slice at the default block of 34; many storage
|
||||
* devices will use a stripe width of 128k, so start there instead.
|
||||
*/
|
||||
#define NEW_START_BLOCK 256
|
||||
|
||||
/*
|
||||
* Validate the given pool name, optionally putting an extended error message in
|
||||
* 'buf'.
|
||||
|
@ -1376,46 +1387,90 @@ zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
|
|||
}
|
||||
|
||||
/*
|
||||
* Find a vdev that matches the search criteria specified. We use the
|
||||
* the nvpair name to determine how we should look for the device.
|
||||
* 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
|
||||
* spare; but FALSE if its an INUSE spare.
|
||||
*/
|
||||
static nvlist_t *
|
||||
vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
|
||||
boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
|
||||
vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
|
||||
boolean_t *l2cache, boolean_t *log)
|
||||
{
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
uint64_t theguid, present;
|
||||
char *path;
|
||||
uint64_t wholedisk = 0;
|
||||
nvlist_t *ret;
|
||||
uint64_t is_log;
|
||||
char *srchkey;
|
||||
nvpair_t *pair = nvlist_next_nvpair(search, NULL);
|
||||
|
||||
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
|
||||
/* Nothing to look for */
|
||||
if (search == NULL || pair == NULL)
|
||||
return (NULL);
|
||||
|
||||
if (search == NULL &&
|
||||
nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
|
||||
/*
|
||||
* If the device has never been present since import, the only
|
||||
* reliable way to match the vdev is by GUID.
|
||||
*/
|
||||
if (theguid == guid)
|
||||
return (nv);
|
||||
} else if (search != NULL &&
|
||||
nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
|
||||
&wholedisk);
|
||||
if (wholedisk) {
|
||||
/*
|
||||
* For whole disks, the internal path has 's0', but the
|
||||
* path passed in by the user doesn't.
|
||||
*/
|
||||
if (strlen(search) == strlen(path) - 2 &&
|
||||
strncmp(search, path, strlen(search)) == 0)
|
||||
return (nv);
|
||||
} else if (strcmp(search, path) == 0) {
|
||||
return (nv);
|
||||
/* Obtain the key we will use to search */
|
||||
srchkey = nvpair_name(pair);
|
||||
|
||||
switch (nvpair_type(pair)) {
|
||||
case DATA_TYPE_UINT64: {
|
||||
uint64_t srchval, theguid, present;
|
||||
|
||||
verify(nvpair_value_uint64(pair, &srchval) == 0);
|
||||
if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
&present) == 0) {
|
||||
/*
|
||||
* If the device has never been present since
|
||||
* import, the only reliable way to match the
|
||||
* vdev is by GUID.
|
||||
*/
|
||||
verify(nvlist_lookup_uint64(nv,
|
||||
ZPOOL_CONFIG_GUID, &theguid) == 0);
|
||||
if (theguid == srchval)
|
||||
return (nv);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DATA_TYPE_STRING: {
|
||||
char *srchval, *val;
|
||||
|
||||
verify(nvpair_value_string(pair, &srchval) == 0);
|
||||
if (nvlist_lookup_string(nv, srchkey, &val) != 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Search for the requested value. We special case the search
|
||||
* for ZPOOL_CONFIG_PATH when it's a wholedisk. Otherwise,
|
||||
* all other searches are simple string compares.
|
||||
*/
|
||||
if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0 && val) {
|
||||
uint64_t wholedisk = 0;
|
||||
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
|
||||
&wholedisk);
|
||||
if (wholedisk) {
|
||||
/*
|
||||
* For whole disks, the internal path has 's0',
|
||||
* but the path passed in by the user doesn't.
|
||||
*/
|
||||
if (strlen(srchval) == strlen(val) - 2 &&
|
||||
strncmp(srchval, val, strlen(srchval)) == 0)
|
||||
return (nv);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Common case
|
||||
*/
|
||||
if (strcmp(srchval, val) == 0)
|
||||
return (nv);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
|
@ -1423,7 +1478,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
|
|||
return (NULL);
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search,
|
||||
avail_spare, l2cache, NULL)) != NULL) {
|
||||
/*
|
||||
* The 'is_log' value is only set for the toplevel
|
||||
|
@ -1444,7 +1499,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
|
|||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++) {
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search,
|
||||
avail_spare, l2cache, NULL)) != NULL) {
|
||||
*avail_spare = B_TRUE;
|
||||
return (ret);
|
||||
|
@ -1455,7 +1510,7 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
|
|||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++) {
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
|
||||
if ((ret = vdev_to_nvlist_iter(child[c], search,
|
||||
avail_spare, l2cache, NULL)) != NULL) {
|
||||
*l2cache = B_TRUE;
|
||||
return (ret);
|
||||
|
@ -1466,24 +1521,48 @@ vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
|
|||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given a physical path (minus the "/devices" prefix), find the
|
||||
* associated vdev.
|
||||
*/
|
||||
nvlist_t *
|
||||
zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
|
||||
boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
|
||||
{
|
||||
nvlist_t *search, *nvroot, *ret;
|
||||
|
||||
verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
|
||||
|
||||
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
|
||||
*avail_spare = B_FALSE;
|
||||
ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
|
||||
nvlist_free(search);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
|
||||
boolean_t *l2cache, boolean_t *log)
|
||||
{
|
||||
char buf[MAXPATHLEN];
|
||||
const char *search;
|
||||
char *end;
|
||||
nvlist_t *nvroot;
|
||||
nvlist_t *nvroot, *search, *ret;
|
||||
uint64_t guid;
|
||||
|
||||
verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
|
||||
guid = strtoull(path, &end, 10);
|
||||
if (guid != 0 && *end == '\0') {
|
||||
search = NULL;
|
||||
verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
|
||||
} else if (path[0] != '/') {
|
||||
(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
|
||||
search = buf;
|
||||
verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, buf) == 0);
|
||||
} else {
|
||||
search = path;
|
||||
verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
|
||||
}
|
||||
|
||||
verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
|
@ -1493,8 +1572,10 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
|
|||
*l2cache = B_FALSE;
|
||||
if (log != NULL)
|
||||
*log = B_FALSE;
|
||||
return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
|
||||
l2cache, log));
|
||||
ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
|
||||
nvlist_free(search);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1511,80 +1592,141 @@ vdev_online(nvlist_t *nv)
|
|||
}
|
||||
|
||||
/*
|
||||
* Get phys_path for a root pool
|
||||
* Return 0 on success; non-zeron on failure.
|
||||
* Helper function for zpool_get_physpaths().
|
||||
*/
|
||||
int
|
||||
zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
|
||||
static int
|
||||
vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
|
||||
size_t *bytes_written)
|
||||
{
|
||||
nvlist_t *vdev_root;
|
||||
nvlist_t **child;
|
||||
uint_t count;
|
||||
int i;
|
||||
size_t bytes_left, pos, rsz;
|
||||
char *tmppath;
|
||||
const char *format;
|
||||
|
||||
/*
|
||||
* Make sure this is a root pool, as phys_path doesn't mean
|
||||
* anything to a non-root pool.
|
||||
*/
|
||||
if (!pool_is_bootable(zhp))
|
||||
return (-1);
|
||||
if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
|
||||
&tmppath) != 0)
|
||||
return (EZFS_NODEVICE);
|
||||
|
||||
verify(nvlist_lookup_nvlist(zhp->zpool_config,
|
||||
ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
|
||||
pos = *bytes_written;
|
||||
bytes_left = physpath_size - pos;
|
||||
format = (pos == 0) ? "%s" : " %s";
|
||||
|
||||
if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &count) != 0)
|
||||
return (-2);
|
||||
rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
|
||||
*bytes_written += rsz;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
nvlist_t **child2;
|
||||
uint_t count2;
|
||||
char *type;
|
||||
char *tmppath;
|
||||
int j;
|
||||
if (rsz >= bytes_left) {
|
||||
/* if physpath was not copied properly, clear it */
|
||||
if (bytes_left != 0) {
|
||||
physpath[pos] = 0;
|
||||
}
|
||||
return (EZFS_NOSPC);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
|
||||
!= 0)
|
||||
return (-3);
|
||||
static int
|
||||
vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
|
||||
size_t *rsz, boolean_t is_spare)
|
||||
{
|
||||
char *type;
|
||||
int ret;
|
||||
|
||||
if (strcmp(type, VDEV_TYPE_DISK) == 0) {
|
||||
if (!vdev_online(child[i]))
|
||||
return (-8);
|
||||
verify(nvlist_lookup_string(child[i],
|
||||
ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
|
||||
(void) strncpy(physpath, tmppath, strlen(tmppath));
|
||||
} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
|
||||
if (nvlist_lookup_nvlist_array(child[i],
|
||||
ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
|
||||
return (-4);
|
||||
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
|
||||
return (EZFS_INVALCONFIG);
|
||||
|
||||
for (j = 0; j < count2; j++) {
|
||||
if (!vdev_online(child2[j]))
|
||||
return (-8);
|
||||
if (nvlist_lookup_string(child2[j],
|
||||
ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
|
||||
return (-5);
|
||||
if (strcmp(type, VDEV_TYPE_DISK) == 0) {
|
||||
/*
|
||||
* An active spare device has ZPOOL_CONFIG_IS_SPARE set.
|
||||
* For a spare vdev, we only want to boot from the active
|
||||
* spare device.
|
||||
*/
|
||||
if (is_spare) {
|
||||
uint64_t spare = 0;
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
|
||||
&spare);
|
||||
if (!spare)
|
||||
return (EZFS_INVALCONFIG);
|
||||
}
|
||||
|
||||
if ((strlen(physpath) + strlen(tmppath)) >
|
||||
MAXNAMELEN)
|
||||
return (-6);
|
||||
if (vdev_online(nv)) {
|
||||
if ((ret = vdev_get_one_physpath(nv, physpath,
|
||||
phypath_size, rsz)) != 0)
|
||||
return (ret);
|
||||
}
|
||||
} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
|
||||
strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
|
||||
(is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
|
||||
nvlist_t **child;
|
||||
uint_t count;
|
||||
int i, ret;
|
||||
|
||||
if (strlen(physpath) == 0) {
|
||||
(void) strncpy(physpath, tmppath,
|
||||
strlen(tmppath));
|
||||
} else {
|
||||
(void) strcat(physpath, " ");
|
||||
(void) strcat(physpath, tmppath);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return (-7);
|
||||
if (nvlist_lookup_nvlist_array(nv,
|
||||
ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
|
||||
return (EZFS_INVALCONFIG);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
ret = vdev_get_physpaths(child[i], physpath,
|
||||
phypath_size, rsz, is_spare);
|
||||
if (ret == EZFS_NOSPC)
|
||||
return (ret);
|
||||
}
|
||||
}
|
||||
|
||||
return (EZFS_POOL_INVALARG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get phys_path for a root pool config.
|
||||
* Return 0 on success; non-zero on failure.
|
||||
*/
|
||||
static int
|
||||
zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
|
||||
{
|
||||
size_t rsz;
|
||||
nvlist_t *vdev_root;
|
||||
nvlist_t **child;
|
||||
uint_t count;
|
||||
char *type;
|
||||
|
||||
rsz = 0;
|
||||
|
||||
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&vdev_root) != 0)
|
||||
return (EZFS_INVALCONFIG);
|
||||
|
||||
if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
|
||||
nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &count) != 0)
|
||||
return (EZFS_INVALCONFIG);
|
||||
|
||||
/*
|
||||
* root pool can not have EFI labeled disks and can only have
|
||||
* a single top-level vdev.
|
||||
*/
|
||||
if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
|
||||
pool_uses_efi(vdev_root))
|
||||
return (EZFS_POOL_INVALARG);
|
||||
|
||||
(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
|
||||
B_FALSE);
|
||||
|
||||
/* No online devices */
|
||||
if (rsz == 0)
|
||||
return (EZFS_NODEVICE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get phys_path for a root pool
|
||||
* Return 0 on success; non-zero on failure.
|
||||
*/
|
||||
int
|
||||
zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
|
||||
{
|
||||
return (zpool_get_config_physpath(zhp->zpool_config, physpath,
|
||||
phypath_size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns TRUE if the given guid corresponds to the given type.
|
||||
* This is used to check for hot spares (INUSE or not), and level 2 cache
|
||||
|
@ -1613,6 +1755,45 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
|
|||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the device has being dynamically expanded then we need to relabel
|
||||
* the disk to use the new unallocated space.
|
||||
*/
|
||||
static int
|
||||
zpool_relabel_disk(libzfs_handle_t *hdl, const char *name)
|
||||
{
|
||||
char path[MAXPATHLEN];
|
||||
char errbuf[1024];
|
||||
int fd, error;
|
||||
int (*_efi_use_whole_disk)(int);
|
||||
|
||||
if ((_efi_use_whole_disk = (int (*)(int))dlsym(RTLD_DEFAULT,
|
||||
"efi_use_whole_disk")) == NULL)
|
||||
return (-1);
|
||||
|
||||
(void) snprintf(path, sizeof (path), "%s/%s", RDISK_ROOT, name);
|
||||
|
||||
if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
|
||||
"relabel '%s': unable to open device"), name);
|
||||
return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible that we might encounter an error if the device
|
||||
* does not have any unallocated space left. If so, we simply
|
||||
* ignore that error and continue on.
|
||||
*/
|
||||
error = _efi_use_whole_disk(fd);
|
||||
(void) close(fd);
|
||||
if (error && error != VT_ENOSPC) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
|
||||
"relabel '%s': unable to read disk capacity"), name);
|
||||
return (zfs_error(hdl, EZFS_NOCAP, errbuf));
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bring the specified vdev online. The 'flags' parameter is a set of the
|
||||
* ZFS_ONLINE_* flags.
|
||||
|
@ -1624,15 +1805,20 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
|||
zfs_cmd_t zc = { 0 };
|
||||
char msg[1024];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache;
|
||||
boolean_t avail_spare, l2cache, islog;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
dgettext(TEXT_DOMAIN, "cannot online %s"), path);
|
||||
if (flags & ZFS_ONLINE_EXPAND) {
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
|
||||
} else {
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
dgettext(TEXT_DOMAIN, "cannot online %s"), path);
|
||||
}
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
NULL)) == NULL)
|
||||
&islog)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
|
||||
verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
|
||||
|
@ -1641,6 +1827,31 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
|||
is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
|
||||
if (flags & ZFS_ONLINE_EXPAND ||
|
||||
zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
|
||||
char *pathname = NULL;
|
||||
uint64_t wholedisk = 0;
|
||||
|
||||
(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
|
||||
&wholedisk);
|
||||
verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
|
||||
&pathname) == 0);
|
||||
|
||||
/*
|
||||
* XXX - L2ARC 1.0 devices can't support expansion.
|
||||
*/
|
||||
if (l2cache) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"cannot expand cache devices"));
|
||||
return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
|
||||
}
|
||||
|
||||
if (wholedisk) {
|
||||
pathname += strlen(DISK_ROOT) + 1;
|
||||
(void) zpool_relabel_disk(zhp->zpool_hdl, pathname);
|
||||
}
|
||||
}
|
||||
|
||||
zc.zc_cookie = VDEV_STATE_ONLINE;
|
||||
zc.zc_obj = flags;
|
||||
|
||||
|
@ -1691,6 +1902,12 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
|
|||
*/
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
|
||||
|
||||
case EEXIST:
|
||||
/*
|
||||
* The log device has unplayed logs
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
|
||||
|
||||
default:
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
}
|
||||
|
@ -1895,6 +2112,14 @@ zpool_vdev_attach(zpool_handle_t *zhp,
|
|||
(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Please "
|
||||
"be sure to invoke %s to make '%s' bootable.\n"),
|
||||
BOOTCMD, new_disk);
|
||||
|
||||
/*
|
||||
* XXX need a better way to prevent user from
|
||||
* booting up a half-baked vdev.
|
||||
*/
|
||||
(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
|
||||
"sure to wait until resilver is done "
|
||||
"before rebooting.\n"));
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
@ -2810,14 +3035,6 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
|
|||
free(mntpnt);
|
||||
}
|
||||
|
||||
#define RDISK_ROOT "/dev/rdsk"
|
||||
#define BACKUP_SLICE "s2"
|
||||
/*
|
||||
* Don't start the slice at the default block of 34; many storage
|
||||
* devices will use a stripe width of 128k, so start there instead.
|
||||
*/
|
||||
#define NEW_START_BLOCK 256
|
||||
|
||||
/*
|
||||
* Read the EFI label from the config, if a label does not exist then
|
||||
* pass back the error to the caller. If the caller has passed a non-NULL
|
||||
|
|
|
@ -237,6 +237,8 @@ send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
|
|||
zfs_prop_t prop = zfs_name_to_prop(propname);
|
||||
nvlist_t *propnv;
|
||||
|
||||
assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
|
||||
|
||||
if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
|
||||
continue;
|
||||
|
||||
|
@ -594,12 +596,18 @@ dump_filesystem(zfs_handle_t *zhp, void *arg)
|
|||
zhp->zfs_name, sdd->fromsnap);
|
||||
sdd->err = B_TRUE;
|
||||
} else if (!sdd->seento) {
|
||||
(void) fprintf(stderr,
|
||||
"WARNING: could not send %s@%s:\n"
|
||||
"incremental source (%s@%s) "
|
||||
"is not earlier than it\n",
|
||||
zhp->zfs_name, sdd->tosnap,
|
||||
zhp->zfs_name, sdd->fromsnap);
|
||||
if (sdd->fromsnap) {
|
||||
(void) fprintf(stderr,
|
||||
"WARNING: could not send %s@%s:\n"
|
||||
"incremental source (%s@%s) "
|
||||
"is not earlier than it\n",
|
||||
zhp->zfs_name, sdd->tosnap,
|
||||
zhp->zfs_name, sdd->fromsnap);
|
||||
} else {
|
||||
(void) fprintf(stderr, "WARNING: "
|
||||
"could not send %s@%s: does not exist\n",
|
||||
zhp->zfs_name, sdd->tosnap);
|
||||
}
|
||||
sdd->err = B_TRUE;
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -210,6 +210,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
|||
case EZFS_ACTIVE_SPARE:
|
||||
return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
|
||||
"device"));
|
||||
case EZFS_UNPLAYED_LOGS:
|
||||
return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
|
||||
"logs"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
|
@ -364,6 +367,11 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
|||
case ENOTSUP:
|
||||
zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
|
||||
break;
|
||||
case EAGAIN:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool I/O is currently suspended"));
|
||||
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
|
||||
break;
|
||||
default:
|
||||
zfs_error_aux(hdl, strerror(errno));
|
||||
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
||||
|
@ -437,6 +445,11 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
|||
case EDQUOT:
|
||||
zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
|
||||
return (-1);
|
||||
case EAGAIN:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool I/O is currently suspended"));
|
||||
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
|
||||
break;
|
||||
|
||||
default:
|
||||
zfs_error_aux(hdl, strerror(error));
|
||||
|
@ -575,6 +588,7 @@ libzfs_init(void)
|
|||
|
||||
zfs_prop_init();
|
||||
zpool_prop_init();
|
||||
libzfs_mnttab_init(hdl);
|
||||
|
||||
return (hdl);
|
||||
}
|
||||
|
@ -592,6 +606,7 @@ libzfs_fini(libzfs_handle_t *hdl)
|
|||
(void) free(hdl->libzfs_log_str);
|
||||
zpool_free_handles(hdl);
|
||||
namespace_clear(hdl);
|
||||
libzfs_mnttab_fini(hdl);
|
||||
free(hdl);
|
||||
}
|
||||
|
||||
|
@ -1209,7 +1224,7 @@ addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
|
|||
* dataset property,
|
||||
*/
|
||||
if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
|
||||
!zfs_prop_user(propname))) {
|
||||
(!zfs_prop_user(propname) && !zfs_prop_userquota(propname)))) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"invalid property '%s'"), propname);
|
||||
return (zfs_error(hdl, EZFS_BADPROP,
|
||||
|
|
|
@ -59,6 +59,7 @@ extern "C" {
|
|||
#include <atomic.h>
|
||||
#include <dirent.h>
|
||||
#include <time.h>
|
||||
#include <libsysevent.h>
|
||||
#include <sys/note.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/cred.h>
|
||||
|
@ -73,6 +74,7 @@ extern "C" {
|
|||
#include <sys/kstat.h>
|
||||
#include <sys/u8_textprep.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/sysevent/dev.h>
|
||||
|
||||
/*
|
||||
* Debugging
|
||||
|
@ -316,6 +318,7 @@ typedef void (task_func_t)(void *);
|
|||
#define TASKQ_PREPOPULATE 0x0001
|
||||
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
|
||||
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
|
||||
#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */
|
||||
|
||||
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
|
||||
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
|
||||
|
@ -540,6 +543,10 @@ typedef struct ksiddomain {
|
|||
ksiddomain_t *ksid_lookupdomain(const char *);
|
||||
void ksiddomain_rele(ksiddomain_t *);
|
||||
|
||||
#define DDI_SLEEP KM_SLEEP
|
||||
#define ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
|
||||
sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -174,6 +174,19 @@ taskq_create(const char *name, int nthreads, pri_t pri,
|
|||
taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
|
||||
int t;
|
||||
|
||||
if (flags & TASKQ_THREADS_CPU_PCT) {
|
||||
int pct;
|
||||
ASSERT3S(nthreads, >=, 0);
|
||||
ASSERT3S(nthreads, <=, 100);
|
||||
pct = MIN(nthreads, 100);
|
||||
pct = MAX(pct, 0);
|
||||
|
||||
nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100;
|
||||
nthreads = MAX(nthreads, 1); /* need at least 1 thread */
|
||||
} else {
|
||||
ASSERT3S(nthreads, >=, 1);
|
||||
}
|
||||
|
||||
rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
|
||||
mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FM_FS_ZFS_H
|
||||
#define _SYS_FM_FS_ZFS_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -57,6 +55,7 @@ extern "C" {
|
|||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID "parent_guid"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE "parent_type"
|
||||
#define FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH "parent_path"
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -48,6 +48,10 @@ typedef enum {
|
|||
#define ZFS_TYPE_DATASET \
|
||||
(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
|
||||
|
||||
#define ZAP_MAXNAMELEN 256
|
||||
#define ZAP_MAXVALUELEN (1024 * 8)
|
||||
#define ZAP_OLDMAXVALUELEN 1024
|
||||
|
||||
/*
|
||||
* Dataset properties are identified by these constants and must be added to
|
||||
* the end of this list to ensure that external consumers are not affected
|
||||
|
@ -105,9 +109,21 @@ typedef enum {
|
|||
ZFS_PROP_USEDDS,
|
||||
ZFS_PROP_USEDCHILD,
|
||||
ZFS_PROP_USEDREFRESERV,
|
||||
ZFS_PROP_USERACCOUNTING, /* not exposed to the user */
|
||||
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_PROP_USERUSED,
|
||||
ZFS_PROP_USERQUOTA,
|
||||
ZFS_PROP_GROUPUSED,
|
||||
ZFS_PROP_GROUPQUOTA,
|
||||
ZFS_NUM_USERQUOTA_PROPS
|
||||
} zfs_userquota_prop_t;
|
||||
|
||||
extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
|
||||
|
||||
/*
|
||||
* Pool properties are identified by these constants and must be added to the
|
||||
* end of this list to ensure that external consumers are not affected
|
||||
|
@ -130,6 +146,7 @@ typedef enum {
|
|||
ZPOOL_PROP_CACHEFILE,
|
||||
ZPOOL_PROP_FAILUREMODE,
|
||||
ZPOOL_PROP_LISTSNAPS,
|
||||
ZPOOL_PROP_AUTOEXPAND,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
|
@ -169,6 +186,7 @@ boolean_t zfs_prop_setonce(zfs_prop_t);
|
|||
const char *zfs_prop_to_name(zfs_prop_t);
|
||||
zfs_prop_t zfs_name_to_prop(const char *);
|
||||
boolean_t zfs_prop_user(const char *);
|
||||
boolean_t zfs_prop_userquota(const char *name);
|
||||
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
|
||||
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
|
||||
boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
|
||||
|
@ -213,6 +231,9 @@ typedef enum {
|
|||
#define ZFS_DELEG_PERM_GID "gid"
|
||||
#define ZFS_DELEG_PERM_GROUPS "groups"
|
||||
|
||||
#define ZFS_SMB_ACL_SRC "src"
|
||||
#define ZFS_SMB_ACL_TARGET "target"
|
||||
|
||||
typedef enum {
|
||||
ZFS_CANMOUNT_OFF = 0,
|
||||
ZFS_CANMOUNT_ON = 1,
|
||||
|
@ -226,6 +247,13 @@ typedef enum zfs_share_op {
|
|||
ZFS_UNSHARE_SMB = 3
|
||||
} zfs_share_op_t;
|
||||
|
||||
typedef enum zfs_smb_acl_op {
|
||||
ZFS_SMB_ACL_ADD,
|
||||
ZFS_SMB_ACL_REMOVE,
|
||||
ZFS_SMB_ACL_RENAME,
|
||||
ZFS_SMB_ACL_PURGE
|
||||
} zfs_smb_acl_op_t;
|
||||
|
||||
typedef enum zfs_cache_type {
|
||||
ZFS_CACHE_NONE = 0,
|
||||
ZFS_CACHE_METADATA = 1,
|
||||
|
@ -250,13 +278,16 @@ typedef enum zfs_cache_type {
|
|||
#define SPA_VERSION_12 12ULL
|
||||
#define SPA_VERSION_13 13ULL
|
||||
#define SPA_VERSION_14 14ULL
|
||||
#define SPA_VERSION_15 15ULL
|
||||
#define SPA_VERSION_16 16ULL
|
||||
/*
|
||||
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
|
||||
* format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*},
|
||||
* and do the appropriate changes.
|
||||
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
|
||||
* and do the appropriate changes. Also bump the version number in
|
||||
* usr/src/grub/capability.
|
||||
*/
|
||||
#define SPA_VERSION SPA_VERSION_14
|
||||
#define SPA_VERSION_STRING "14"
|
||||
#define SPA_VERSION SPA_VERSION_16
|
||||
#define SPA_VERSION_STRING "16"
|
||||
|
||||
/*
|
||||
* Symbolic names for the changes that caused a SPA_VERSION switch.
|
||||
|
@ -292,6 +323,8 @@ typedef enum zfs_cache_type {
|
|||
#define SPA_VERSION_SNAP_PROPS SPA_VERSION_12
|
||||
#define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13
|
||||
#define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14
|
||||
#define SPA_VERSION_USERSPACE SPA_VERSION_15
|
||||
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
|
||||
|
||||
/*
|
||||
* ZPL version - rev'd whenever an incompatible on-disk format change
|
||||
|
@ -299,19 +332,21 @@ typedef enum zfs_cache_type {
|
|||
* also update the version_table[] and help message in zfs_prop.c.
|
||||
*
|
||||
* When changing, be sure to teach GRUB how to read the new format!
|
||||
* See usr/src/grub/grub-0.95/stage2/{zfs-include/,fsys_zfs*}
|
||||
* See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*}
|
||||
*/
|
||||
#define ZPL_VERSION_1 1ULL
|
||||
#define ZPL_VERSION_2 2ULL
|
||||
#define ZPL_VERSION_3 3ULL
|
||||
#define ZPL_VERSION ZPL_VERSION_3
|
||||
#define ZPL_VERSION_STRING "3"
|
||||
#define ZPL_VERSION_4 4ULL
|
||||
#define ZPL_VERSION ZPL_VERSION_4
|
||||
#define ZPL_VERSION_STRING "4"
|
||||
|
||||
#define ZPL_VERSION_INITIAL ZPL_VERSION_1
|
||||
#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2
|
||||
#define ZPL_VERSION_FUID ZPL_VERSION_3
|
||||
#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3
|
||||
#define ZPL_VERSION_SYSATTR ZPL_VERSION_3
|
||||
#define ZPL_VERSION_USERSPACE ZPL_VERSION_4
|
||||
|
||||
/*
|
||||
* The following are configuration names used in the nvlist describing a pool's
|
||||
|
@ -361,6 +396,7 @@ typedef enum zfs_cache_type {
|
|||
#define ZPOOL_CONFIG_FAULTED "faulted"
|
||||
#define ZPOOL_CONFIG_DEGRADED "degraded"
|
||||
#define ZPOOL_CONFIG_REMOVED "removed"
|
||||
#define ZPOOL_CONFIG_FRU "fru"
|
||||
|
||||
#define VDEV_TYPE_ROOT "root"
|
||||
#define VDEV_TYPE_MIRROR "mirror"
|
||||
|
@ -503,7 +539,7 @@ typedef struct vdev_stat {
|
|||
/*
|
||||
* And here are the things we need with /dev, etc. in front of them.
|
||||
*/
|
||||
#define ZVOL_PSEUDO_DEV "/devices/pseudo/zvol@0:"
|
||||
#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:"
|
||||
#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/"
|
||||
|
||||
#define ZVOL_PROP_NAME "name"
|
||||
|
@ -531,6 +567,7 @@ typedef enum zfs_ioc {
|
|||
ZFS_IOC_VDEV_ATTACH,
|
||||
ZFS_IOC_VDEV_DETACH,
|
||||
ZFS_IOC_VDEV_SETPATH,
|
||||
ZFS_IOC_VDEV_SETFRU,
|
||||
ZFS_IOC_OBJSET_STATS,
|
||||
ZFS_IOC_OBJSET_ZPLPROPS,
|
||||
ZFS_IOC_DATASET_LIST_NEXT,
|
||||
|
@ -560,7 +597,11 @@ typedef enum zfs_ioc {
|
|||
ZFS_IOC_GET_FSACL,
|
||||
ZFS_IOC_ISCSI_PERM_CHECK,
|
||||
ZFS_IOC_SHARE,
|
||||
ZFS_IOC_INHERIT_PROP
|
||||
ZFS_IOC_INHERIT_PROP,
|
||||
ZFS_IOC_SMB_ACL,
|
||||
ZFS_IOC_USERSPACE_ONE,
|
||||
ZFS_IOC_USERSPACE_MANY,
|
||||
ZFS_IOC_USERSPACE_UPGRADE
|
||||
} zfs_ioc_t;
|
||||
|
||||
/*
|
||||
|
@ -602,6 +643,7 @@ typedef enum {
|
|||
#define ZFS_ONLINE_CHECKREMOVE 0x1
|
||||
#define ZFS_ONLINE_UNSPARE 0x2
|
||||
#define ZFS_ONLINE_FORCEFAULT 0x4
|
||||
#define ZFS_ONLINE_EXPAND 0x8
|
||||
#define ZFS_OFFLINE_TEMPORARY 0x1
|
||||
|
||||
/*
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_DELEG_H
|
||||
#define _ZFS_DELEG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -59,6 +57,10 @@ typedef enum {
|
|||
ZFS_DELEG_NOTE_USERPROP,
|
||||
ZFS_DELEG_NOTE_MOUNT,
|
||||
ZFS_DELEG_NOTE_SHARE,
|
||||
ZFS_DELEG_NOTE_USERQUOTA,
|
||||
ZFS_DELEG_NOTE_GROUPQUOTA,
|
||||
ZFS_DELEG_NOTE_USERUSED,
|
||||
ZFS_DELEG_NOTE_GROUPUSED,
|
||||
ZFS_DELEG_NOTE_NONE
|
||||
} zfs_deleg_note_t;
|
||||
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_NAMECHECK_H
|
||||
#define _ZFS_NAMECHECK_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -50,7 +48,6 @@ typedef enum {
|
|||
int pool_namecheck(const char *, namecheck_err_t *, char *);
|
||||
int dataset_namecheck(const char *, namecheck_err_t *, char *);
|
||||
int mountpoint_namecheck(const char *, namecheck_err_t *);
|
||||
int dataset_name_hidden(const char *);
|
||||
int snapshot_namecheck(const char *, namecheck_err_t *, char *);
|
||||
int permset_namecheck(const char *, namecheck_err_t *, char *);
|
||||
|
||||
|
|
|
@ -19,13 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#if defined(_KERNEL)
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sunddi.h>
|
||||
|
@ -66,6 +63,10 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
|
|||
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
|
||||
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
|
||||
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
|
||||
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
|
||||
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
|
||||
{ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
|
||||
{ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
|
||||
{NULL, ZFS_DELEG_NOTE_NONE }
|
||||
};
|
||||
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Common name validation routines for ZFS. These routines are shared by the
|
||||
* userland code as well as the ioctl() layer to ensure that we don't
|
||||
|
@ -345,19 +343,3 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
|
|||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the dataset name is private for internal usage.
|
||||
* '$' is reserved for internal dataset names. e.g. "$MOS"
|
||||
*
|
||||
* Return 1 if the given name is used internally.
|
||||
* Return 0 if it is not.
|
||||
*/
|
||||
int
|
||||
dataset_name_hidden(const char *name)
|
||||
{
|
||||
if (strchr(name, '$') != NULL)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -43,6 +43,14 @@
|
|||
|
||||
static zprop_desc_t zfs_prop_table[ZFS_NUM_PROPS];
|
||||
|
||||
/* Note this is indexed by zfs_userquota_prop_t, keep the order the same */
|
||||
const char *zfs_userquota_prop_prefixes[] = {
|
||||
"userused@",
|
||||
"userquota@",
|
||||
"groupused@",
|
||||
"groupquota@"
|
||||
};
|
||||
|
||||
zprop_desc_t *
|
||||
zfs_prop_get_table(void)
|
||||
{
|
||||
|
@ -133,6 +141,7 @@ zfs_prop_init(void)
|
|||
{ "1", 1 },
|
||||
{ "2", 2 },
|
||||
{ "3", 3 },
|
||||
{ "4", 4 },
|
||||
{ "current", ZPL_VERSION },
|
||||
{ NULL }
|
||||
};
|
||||
|
@ -218,7 +227,7 @@ zfs_prop_init(void)
|
|||
/* default index properties */
|
||||
register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
"1 | 2 | 3 | current", "VERSION", version_table);
|
||||
"1 | 2 | 3 | 4 | current", "VERSION", version_table);
|
||||
register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
|
||||
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
|
||||
"CANMOUNT", canmount_table);
|
||||
|
@ -305,8 +314,13 @@ zfs_prop_init(void)
|
|||
PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
|
||||
register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", PROP_TYPE_STRING,
|
||||
PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
|
||||
register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
|
||||
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
|
||||
"STMF_SBD_LU");
|
||||
register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "GUID");
|
||||
register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, NULL);
|
||||
|
||||
/* oddball properties */
|
||||
register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL,
|
||||
|
@ -330,7 +344,6 @@ zfs_name_to_prop(const char *propname)
|
|||
return (zprop_name_to_prop(propname, ZFS_TYPE_DATASET));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For user property names, we allow all lowercase alphanumeric characters, plus
|
||||
* a few useful punctuation characters.
|
||||
|
@ -367,6 +380,26 @@ zfs_prop_user(const char *name)
|
|||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if this is a valid userspace-type property (one with a '@').
|
||||
* Note that after the @, any character is valid (eg, another @, for SID
|
||||
* user@domain).
|
||||
*/
|
||||
boolean_t
|
||||
zfs_prop_userquota(const char *name)
|
||||
{
|
||||
zfs_userquota_prop_t prop;
|
||||
|
||||
for (prop = 0; prop < ZFS_NUM_USERQUOTA_PROPS; prop++) {
|
||||
if (strncmp(name, zfs_userquota_prop_prefixes[prop],
|
||||
strlen(zfs_userquota_prop_prefixes[prop])) == 0) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tables of index types, plus functions to convert between the user view
|
||||
* (strings) and internal representation (uint64_t).
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -96,6 +96,8 @@ zpool_prop_init(void)
|
|||
ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
|
||||
register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "LISTSNAPS", boolean_table);
|
||||
register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
|
||||
|
||||
/* default index properties */
|
||||
register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Common routines used by zfs and zpool property management.
|
||||
*/
|
||||
|
@ -205,9 +203,6 @@ propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
|
|||
#ifndef _KERNEL
|
||||
const char *colname = prop_entry->pd_colname;
|
||||
int c;
|
||||
|
||||
if (colname == NULL)
|
||||
return (B_FALSE);
|
||||
#endif
|
||||
|
||||
if (len == strlen(propname) &&
|
||||
|
@ -215,7 +210,7 @@ propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
|
|||
return (B_TRUE);
|
||||
|
||||
#ifndef _KERNEL
|
||||
if (len != strlen(colname))
|
||||
if (colname == NULL || len != strlen(colname))
|
||||
return (B_FALSE);
|
||||
|
||||
for (c = 0; c < len; c++)
|
||||
|
|
|
@ -124,6 +124,7 @@
|
|||
#include <sys/arc.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/vmsystm.h>
|
||||
#include <vm/anon.h>
|
||||
|
@ -397,6 +398,7 @@ static arc_state_t *arc_l2c_only;
|
|||
|
||||
static int arc_no_grow; /* Don't try to grow cache size */
|
||||
static uint64_t arc_tempreserve;
|
||||
static uint64_t arc_loaned_bytes;
|
||||
static uint64_t arc_meta_used;
|
||||
static uint64_t arc_meta_limit;
|
||||
static uint64_t arc_meta_max = 0;
|
||||
|
@ -610,7 +612,7 @@ typedef struct l2arc_write_callback {
|
|||
struct l2arc_buf_hdr {
|
||||
/* protected by arc_buf_hdr mutex */
|
||||
l2arc_dev_t *b_dev; /* L2ARC device */
|
||||
daddr_t b_daddr; /* disk address, offset byte */
|
||||
uint64_t b_daddr; /* disk address, offset byte */
|
||||
};
|
||||
|
||||
typedef struct l2arc_data_free {
|
||||
|
@ -1207,6 +1209,41 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
|
|||
return (buf);
|
||||
}
|
||||
|
||||
static char *arc_onloan_tag = "onloan";
|
||||
|
||||
/*
|
||||
* Loan out an anonymous arc buffer. Loaned buffers are not counted as in
|
||||
* flight data by arc_tempreserve_space() until they are "returned". Loaned
|
||||
* buffers must be returned to the arc before they can be used by the DMU or
|
||||
* freed.
|
||||
*/
|
||||
arc_buf_t *
|
||||
arc_loan_buf(spa_t *spa, int size)
|
||||
{
|
||||
arc_buf_t *buf;
|
||||
|
||||
buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA);
|
||||
|
||||
atomic_add_64(&arc_loaned_bytes, size);
|
||||
return (buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a loaned arc buffer to the arc.
|
||||
*/
|
||||
void
|
||||
arc_return_buf(arc_buf_t *buf, void *tag)
|
||||
{
|
||||
arc_buf_hdr_t *hdr = buf->b_hdr;
|
||||
|
||||
ASSERT(hdr->b_state == arc_anon);
|
||||
ASSERT(buf->b_data != NULL);
|
||||
VERIFY(refcount_remove(&hdr->b_refcnt, arc_onloan_tag) == 0);
|
||||
VERIFY(refcount_add(&hdr->b_refcnt, tag) == 1);
|
||||
|
||||
atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
|
||||
}
|
||||
|
||||
static arc_buf_t *
|
||||
arc_buf_clone(arc_buf_t *from)
|
||||
{
|
||||
|
@ -2508,7 +2545,6 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
|
|||
uint32_t *arc_flags, const zbookmark_t *zb)
|
||||
{
|
||||
int err;
|
||||
arc_buf_hdr_t *hdr = pbuf->b_hdr;
|
||||
|
||||
ASSERT(!refcount_is_zero(&pbuf->b_hdr->b_refcnt));
|
||||
ASSERT3U((char *)bp - (char *)pbuf->b_data, <, pbuf->b_hdr->b_size);
|
||||
|
@ -2516,9 +2552,8 @@ arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
|
|||
|
||||
err = arc_read_nolock(pio, spa, bp, done, private, priority,
|
||||
zio_flags, arc_flags, zb);
|
||||
|
||||
ASSERT3P(hdr, ==, pbuf->b_hdr);
|
||||
rw_exit(&pbuf->b_lock);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -2608,7 +2643,7 @@ top:
|
|||
uint64_t size = BP_GET_LSIZE(bp);
|
||||
arc_callback_t *acb;
|
||||
vdev_t *vd = NULL;
|
||||
daddr_t addr;
|
||||
uint64_t addr;
|
||||
boolean_t devw = B_FALSE;
|
||||
|
||||
if (hdr == NULL) {
|
||||
|
@ -2927,6 +2962,7 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||
kmutex_t *hash_lock;
|
||||
l2arc_buf_hdr_t *l2hdr;
|
||||
uint64_t buf_size;
|
||||
boolean_t released = B_FALSE;
|
||||
|
||||
rw_enter(&buf->b_lock, RW_WRITER);
|
||||
hdr = buf->b_hdr;
|
||||
|
@ -2942,12 +2978,12 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||
ASSERT(buf->b_efunc == NULL);
|
||||
arc_buf_thaw(buf);
|
||||
rw_exit(&buf->b_lock);
|
||||
return;
|
||||
released = B_TRUE;
|
||||
} else {
|
||||
hash_lock = HDR_LOCK(hdr);
|
||||
mutex_enter(hash_lock);
|
||||
}
|
||||
|
||||
hash_lock = HDR_LOCK(hdr);
|
||||
mutex_enter(hash_lock);
|
||||
|
||||
l2hdr = hdr->b_l2hdr;
|
||||
if (l2hdr) {
|
||||
mutex_enter(&l2arc_buflist_mtx);
|
||||
|
@ -2955,6 +2991,9 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||
buf_size = hdr->b_size;
|
||||
}
|
||||
|
||||
if (released)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Do we have more than one buf?
|
||||
*/
|
||||
|
@ -3022,6 +3061,7 @@ arc_release(arc_buf_t *buf, void *tag)
|
|||
buf->b_efunc = NULL;
|
||||
buf->b_private = NULL;
|
||||
|
||||
out:
|
||||
if (l2hdr) {
|
||||
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
|
||||
kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
|
||||
|
@ -3315,10 +3355,9 @@ arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
|||
}
|
||||
|
||||
static int
|
||||
arc_memory_throttle(uint64_t reserve, uint64_t txg)
|
||||
arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
uint64_t inflight_data = arc_anon->arcs_size;
|
||||
uint64_t available_memory = ptob(freemem);
|
||||
static uint64_t page_load = 0;
|
||||
static uint64_t last_txg = 0;
|
||||
|
@ -3380,6 +3419,7 @@ int
|
|||
arc_tempreserve_space(uint64_t reserve, uint64_t txg)
|
||||
{
|
||||
int error;
|
||||
uint64_t anon_size;
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
/*
|
||||
|
@ -3395,12 +3435,19 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
|
|||
if (reserve > arc_c)
|
||||
return (ENOMEM);
|
||||
|
||||
/*
|
||||
* Don't count loaned bufs as in flight dirty data to prevent long
|
||||
* network delays from blocking transactions that are ready to be
|
||||
* assigned to a txg.
|
||||
*/
|
||||
anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
|
||||
|
||||
/*
|
||||
* Writes will, almost always, require additional memory allocations
|
||||
* in order to compress/encrypt/etc the data. We therefor need to
|
||||
* make sure that there is sufficient available memory for this.
|
||||
*/
|
||||
if (error = arc_memory_throttle(reserve, txg))
|
||||
if (error = arc_memory_throttle(reserve, anon_size, txg))
|
||||
return (error);
|
||||
|
||||
/*
|
||||
|
@ -3410,8 +3457,9 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
|
|||
* Note: if two requests come in concurrently, we might let them
|
||||
* both succeed, when one of them should fail. Not a huge deal.
|
||||
*/
|
||||
if (reserve + arc_tempreserve + arc_anon->arcs_size > arc_c / 2 &&
|
||||
arc_anon->arcs_size > arc_c / 4) {
|
||||
|
||||
if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
|
||||
anon_size > arc_c / 4) {
|
||||
dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
|
||||
"anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
|
||||
arc_tempreserve>>10,
|
||||
|
@ -3596,6 +3644,8 @@ arc_fini(void)
|
|||
mutex_destroy(&zfs_write_limit_lock);
|
||||
|
||||
buf_fini();
|
||||
|
||||
ASSERT(arc_loaned_bytes == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4490,7 +4540,7 @@ l2arc_vdev_present(vdev_t *vd)
|
|||
* validated the vdev and opened it.
|
||||
*/
|
||||
void
|
||||
l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
|
||||
l2arc_add_vdev(spa_t *spa, vdev_t *vd)
|
||||
{
|
||||
l2arc_dev_t *adddev;
|
||||
|
||||
|
@ -4504,8 +4554,8 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end)
|
|||
adddev->l2ad_vdev = vd;
|
||||
adddev->l2ad_write = l2arc_write_max;
|
||||
adddev->l2ad_boost = l2arc_write_boost;
|
||||
adddev->l2ad_start = start;
|
||||
adddev->l2ad_end = end;
|
||||
adddev->l2ad_start = VDEV_LABEL_START_SIZE;
|
||||
adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd);
|
||||
adddev->l2ad_hand = adddev->l2ad_start;
|
||||
adddev->l2ad_evict = adddev->l2ad_start;
|
||||
adddev->l2ad_first = B_TRUE;
|
||||
|
|
|
@ -329,7 +329,7 @@ dbuf_verify(dmu_buf_impl_t *db)
|
|||
if (db->db_parent == dn->dn_dbuf) {
|
||||
/* db is pointed to by the dnode */
|
||||
/* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */
|
||||
if (db->db.db_object == DMU_META_DNODE_OBJECT)
|
||||
if (DMU_OBJECT_IS_SPECIAL(db->db.db_object))
|
||||
ASSERT(db->db_parent == NULL);
|
||||
else
|
||||
ASSERT(db->db_parent != NULL);
|
||||
|
@ -465,15 +465,15 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||
ASSERT(db->db_buf == NULL);
|
||||
|
||||
if (db->db_blkid == DB_BONUS_BLKID) {
|
||||
int bonuslen = dn->dn_bonuslen;
|
||||
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
|
||||
|
||||
ASSERT3U(bonuslen, <=, db->db.db_size);
|
||||
db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
|
||||
arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
|
||||
if (bonuslen < DN_MAX_BONUSLEN)
|
||||
bzero(db->db.db_data, DN_MAX_BONUSLEN);
|
||||
bcopy(DN_BONUS(dn->dn_phys), db->db.db_data,
|
||||
bonuslen);
|
||||
if (bonuslen)
|
||||
bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
|
||||
dbuf_update_data(db);
|
||||
db->db_state = DB_CACHED;
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
@ -908,15 +908,11 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||
* Shouldn't dirty a regular buffer in syncing context. Private
|
||||
* objects may be dirtied in syncing context, but only if they
|
||||
* were already pre-dirtied in open context.
|
||||
* XXX We may want to prohibit dirtying in syncing context even
|
||||
* if they did pre-dirty.
|
||||
*/
|
||||
ASSERT(!dmu_tx_is_syncing(tx) ||
|
||||
BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
|
||||
dn->dn_object == DMU_META_DNODE_OBJECT ||
|
||||
dn->dn_objset->os_dsl_dataset == NULL ||
|
||||
dsl_dir_is_private(dn->dn_objset->os_dsl_dataset->ds_dir));
|
||||
|
||||
DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
|
||||
dn->dn_objset->os_dsl_dataset == NULL);
|
||||
/*
|
||||
* We make this assert for private objects as well, but after we
|
||||
* check if we're already dirty. They are allowed to re-dirty
|
||||
|
@ -975,7 +971,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||
/*
|
||||
* Only valid if not already dirty.
|
||||
*/
|
||||
ASSERT(dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
|
||||
ASSERT(dn->dn_object == 0 ||
|
||||
dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
|
||||
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
|
||||
|
||||
ASSERT3U(dn->dn_nlevels, >, db->db_level);
|
||||
|
@ -987,15 +984,13 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||
|
||||
/*
|
||||
* We should only be dirtying in syncing context if it's the
|
||||
* mos, a spa os, or we're initializing the os. However, we are
|
||||
* allowed to dirty in syncing context provided we already
|
||||
* dirtied it in open context. Hence we must make this
|
||||
* assertion only if we're not already dirty.
|
||||
* mos or we're initializing the os or it's a special object.
|
||||
* However, we are allowed to dirty in syncing context provided
|
||||
* we already dirtied it in open context. Hence we must make
|
||||
* this assertion only if we're not already dirty.
|
||||
*/
|
||||
ASSERT(!dmu_tx_is_syncing(tx) ||
|
||||
os->os_dsl_dataset == NULL ||
|
||||
!dsl_dir_is_private(os->os_dsl_dataset->ds_dir) ||
|
||||
!BP_IS_HOLE(os->os_rootbp));
|
||||
ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
|
||||
os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
|
||||
ASSERT(db->db.db_size != 0);
|
||||
|
||||
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
|
||||
|
@ -1311,6 +1306,68 @@ dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
|||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Directly assign a provided arc buf to a given dbuf if it's not referenced
|
||||
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
|
||||
*/
|
||||
void
|
||||
dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(!refcount_is_zero(&db->db_holds));
|
||||
ASSERT(db->db_dnode->dn_object != DMU_META_DNODE_OBJECT);
|
||||
ASSERT(db->db_blkid != DB_BONUS_BLKID);
|
||||
ASSERT(db->db_level == 0);
|
||||
ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
|
||||
ASSERT(buf != NULL);
|
||||
ASSERT(arc_buf_size(buf) == db->db.db_size);
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
|
||||
arc_return_buf(buf, db);
|
||||
ASSERT(arc_released(buf));
|
||||
|
||||
mutex_enter(&db->db_mtx);
|
||||
|
||||
while (db->db_state == DB_READ || db->db_state == DB_FILL)
|
||||
cv_wait(&db->db_changed, &db->db_mtx);
|
||||
|
||||
ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
|
||||
|
||||
if (db->db_state == DB_CACHED &&
|
||||
refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
|
||||
mutex_exit(&db->db_mtx);
|
||||
(void) dbuf_dirty(db, tx);
|
||||
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
|
||||
VERIFY(arc_buf_remove_ref(buf, db) == 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (db->db_state == DB_CACHED) {
|
||||
dbuf_dirty_record_t *dr = db->db_last_dirty;
|
||||
|
||||
ASSERT(db->db_buf != NULL);
|
||||
if (dr != NULL && dr->dr_txg == tx->tx_txg) {
|
||||
ASSERT(dr->dt.dl.dr_data == db->db_buf);
|
||||
if (!arc_released(db->db_buf)) {
|
||||
ASSERT(dr->dt.dl.dr_override_state ==
|
||||
DR_OVERRIDDEN);
|
||||
arc_release(db->db_buf, db);
|
||||
}
|
||||
dr->dt.dl.dr_data = buf;
|
||||
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
|
||||
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
|
||||
arc_release(db->db_buf, db);
|
||||
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
|
||||
}
|
||||
db->db_buf = NULL;
|
||||
}
|
||||
ASSERT(db->db_buf == NULL);
|
||||
dbuf_set_data(db, buf);
|
||||
db->db_state = DB_FILL;
|
||||
mutex_exit(&db->db_mtx);
|
||||
(void) dbuf_dirty(db, tx);
|
||||
dbuf_fill_done(db, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* "Clear" the contents of this dbuf. This will mark the dbuf
|
||||
* EVICTING and clear *most* of its references. Unfortunetely,
|
||||
|
@ -1855,6 +1912,19 @@ dmu_buf_get_user(dmu_buf_t *db_fake)
|
|||
return (db->db_user_ptr);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
dmu_buf_freeable(dmu_buf_t *dbuf)
|
||||
{
|
||||
boolean_t res = B_FALSE;
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
|
||||
|
||||
if (db->db_blkptr)
|
||||
res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
|
||||
db->db_blkptr->blk_birth);
|
||||
|
||||
return (res);
|
||||
}
|
||||
|
||||
static void
|
||||
dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
|
||||
{
|
||||
|
@ -1943,7 +2013,6 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||
dnode_t *dn = db->db_dnode;
|
||||
objset_impl_t *os = dn->dn_objset;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
int blksz;
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
|
@ -2049,32 +2118,25 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
|
|||
return;
|
||||
}
|
||||
|
||||
if (db->db_state != DB_NOFILL) {
|
||||
blksz = arc_buf_size(*datap);
|
||||
|
||||
if (dn->dn_object != DMU_META_DNODE_OBJECT) {
|
||||
/*
|
||||
* If this buffer is currently "in use" (i.e., there
|
||||
* are active holds and db_data still references it),
|
||||
* then make a copy before we start the write so that
|
||||
* any modifications from the open txg will not leak
|
||||
* into this write.
|
||||
*
|
||||
* NOTE: this copy does not need to be made for
|
||||
* objects only modified in the syncing context (e.g.
|
||||
* DNONE_DNODE blocks).
|
||||
*/
|
||||
if (refcount_count(&db->db_holds) > 1 &&
|
||||
*datap == db->db_buf) {
|
||||
arc_buf_contents_t type =
|
||||
DBUF_GET_BUFC_TYPE(db);
|
||||
*datap =
|
||||
arc_buf_alloc(os->os_spa, blksz, db, type);
|
||||
bcopy(db->db.db_data, (*datap)->b_data, blksz);
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(*datap != NULL);
|
||||
if (db->db_state != DB_NOFILL &&
|
||||
dn->dn_object != DMU_META_DNODE_OBJECT &&
|
||||
refcount_count(&db->db_holds) > 1 &&
|
||||
*datap == db->db_buf) {
|
||||
/*
|
||||
* If this buffer is currently "in use" (i.e., there
|
||||
* are active holds and db_data still references it),
|
||||
* then make a copy before we start the write so that
|
||||
* any modifications from the open txg will not leak
|
||||
* into this write.
|
||||
*
|
||||
* NOTE: this copy does not need to be made for
|
||||
* objects only modified in the syncing context (e.g.
|
||||
* DNONE_DNODE blocks).
|
||||
*/
|
||||
int blksz = arc_buf_size(*datap);
|
||||
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
|
||||
*datap = arc_buf_alloc(os->os_spa, blksz, db, type);
|
||||
bcopy(db->db.db_data, (*datap)->b_data, blksz);
|
||||
}
|
||||
db->db_data_pending = dr;
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -85,6 +85,8 @@ const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
|||
{ byteswap_uint64_array, TRUE, "FUID table size" },
|
||||
{ zap_byteswap, TRUE, "DSL dataset next clones"},
|
||||
{ zap_byteswap, TRUE, "scrub work queue" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group used" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group quota" },
|
||||
};
|
||||
|
||||
int
|
||||
|
@ -180,22 +182,22 @@ dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp)
|
|||
* whose dnodes are in the same block.
|
||||
*/
|
||||
static int
|
||||
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
|
||||
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
|
||||
int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
|
||||
{
|
||||
dsl_pool_t *dp = NULL;
|
||||
dmu_buf_t **dbp;
|
||||
uint64_t blkid, nblks, i;
|
||||
uint32_t flags;
|
||||
uint32_t dbuf_flags;
|
||||
int err;
|
||||
zio_t *zio;
|
||||
hrtime_t start;
|
||||
|
||||
ASSERT(length <= DMU_MAX_ACCESS);
|
||||
|
||||
flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
|
||||
if (length > zfetch_array_rd_sz)
|
||||
flags |= DB_RF_NOPREFETCH;
|
||||
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT;
|
||||
if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
|
||||
dbuf_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
if (dn->dn_datablkshift) {
|
||||
|
@ -233,7 +235,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset,
|
|||
/* initiate async i/o */
|
||||
if (read) {
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
(void) dbuf_read(db, zio, flags);
|
||||
(void) dbuf_read(db, zio, dbuf_flags);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
}
|
||||
dbp[i] = &db->db;
|
||||
|
@ -285,7 +287,7 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
|
|||
return (err);
|
||||
|
||||
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
|
||||
numbufsp, dbpp);
|
||||
numbufsp, dbpp, DMU_READ_PREFETCH);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
|
@ -300,7 +302,7 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
|
|||
int err;
|
||||
|
||||
err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
|
||||
numbufsp, dbpp);
|
||||
numbufsp, dbpp, DMU_READ_PREFETCH);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
@ -442,7 +444,8 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||
object_size = align == 1 ? dn->dn_datablksz :
|
||||
(dn->dn_maxblkid + 1) << dn->dn_datablkshift;
|
||||
|
||||
if (trunc || (end = offset + length) > object_size)
|
||||
end = offset + length;
|
||||
if (trunc || end > object_size)
|
||||
end = object_size;
|
||||
if (end <= offset)
|
||||
return (0);
|
||||
|
@ -450,6 +453,7 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
|
|||
|
||||
while (length) {
|
||||
start = end;
|
||||
/* assert(offset <= start) */
|
||||
err = get_next_chunk(dn, &start, offset);
|
||||
if (err)
|
||||
return (err);
|
||||
|
@ -540,7 +544,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
|||
|
||||
int
|
||||
dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
void *buf)
|
||||
void *buf, uint32_t flags)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_buf_t **dbp;
|
||||
|
@ -570,7 +574,7 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||
* to be reading in parallel.
|
||||
*/
|
||||
err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
|
||||
TRUE, FTAG, &numbufs, &dbp);
|
||||
TRUE, FTAG, &numbufs, &dbp, flags);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
|
@ -810,6 +814,58 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocate a loaned anonymous arc buffer.
|
||||
*/
|
||||
arc_buf_t *
|
||||
dmu_request_arcbuf(dmu_buf_t *handle, int size)
|
||||
{
|
||||
dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
|
||||
|
||||
return (arc_loan_buf(dn->dn_objset->os_spa, size));
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a loaned arc buffer.
|
||||
*/
|
||||
void
|
||||
dmu_return_arcbuf(arc_buf_t *buf)
|
||||
{
|
||||
arc_return_buf(buf, FTAG);
|
||||
VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* When possible directly assign passed loaned arc buffer to a dbuf.
|
||||
* If this is not possible copy the contents of passed arc buf via
|
||||
* dmu_write().
|
||||
*/
|
||||
void
|
||||
dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode;
|
||||
dmu_buf_impl_t *db;
|
||||
uint32_t blksz = (uint32_t)arc_buf_size(buf);
|
||||
uint64_t blkid;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
blkid = dbuf_whichblock(dn, offset);
|
||||
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (offset == db->db.db_offset && blksz == db->db.db_size) {
|
||||
dbuf_assign_arcbuf(db, buf, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
} else {
|
||||
dbuf_rele(db, FTAG);
|
||||
ASSERT(dn->dn_objset->os.os == dn->dn_objset);
|
||||
dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz,
|
||||
buf->b_data, tx);
|
||||
dmu_return_arcbuf(buf);
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
dbuf_dirty_record_t *dr;
|
||||
dmu_sync_cb_t *done;
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
|
@ -108,22 +106,56 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
|||
|
||||
int
|
||||
dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_tx_t *tx;
|
||||
int nblkptr;
|
||||
int err;
|
||||
|
||||
if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
|
||||
if (object == DMU_META_DNODE_OBJECT)
|
||||
return (EBADF);
|
||||
|
||||
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
|
||||
FTAG, &dn);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
if (dn->dn_type == ot && dn->dn_datablksz == blocksize &&
|
||||
dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) {
|
||||
/* nothing is changing, this is a noop */
|
||||
dnode_rele(dn, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
|
||||
/*
|
||||
* If we are losing blkptrs or changing the block size this must
|
||||
* be a new file instance. We must clear out the previous file
|
||||
* contents before we can change this type of metadata in the dnode.
|
||||
*/
|
||||
if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) {
|
||||
err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_bonus(tx, object);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
out:
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
return (0);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -164,10 +164,15 @@ dmu_objset_byteswap(void *buf, size_t size)
|
|||
{
|
||||
objset_phys_t *osp = buf;
|
||||
|
||||
ASSERT(size == sizeof (objset_phys_t));
|
||||
ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
|
||||
dnode_byteswap(&osp->os_meta_dnode);
|
||||
byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
|
||||
osp->os_type = BSWAP_64(osp->os_type);
|
||||
osp->os_flags = BSWAP_64(osp->os_flags);
|
||||
if (size == sizeof (objset_phys_t)) {
|
||||
dnode_byteswap(&osp->os_userused_dnode);
|
||||
dnode_byteswap(&osp->os_groupused_dnode);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -210,12 +215,30 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|||
err = EIO;
|
||||
return (err);
|
||||
}
|
||||
|
||||
/* Increase the blocksize if we are permitted. */
|
||||
if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
|
||||
arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) {
|
||||
arc_buf_t *buf = arc_buf_alloc(spa,
|
||||
sizeof (objset_phys_t), &osi->os_phys_buf,
|
||||
ARC_BUFC_METADATA);
|
||||
bzero(buf->b_data, sizeof (objset_phys_t));
|
||||
bcopy(osi->os_phys_buf->b_data, buf->b_data,
|
||||
arc_buf_size(osi->os_phys_buf));
|
||||
(void) arc_buf_remove_ref(osi->os_phys_buf,
|
||||
&osi->os_phys_buf);
|
||||
osi->os_phys_buf = buf;
|
||||
}
|
||||
|
||||
osi->os_phys = osi->os_phys_buf->b_data;
|
||||
osi->os_flags = osi->os_phys->os_flags;
|
||||
} else {
|
||||
osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
|
||||
int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
|
||||
sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
|
||||
osi->os_phys_buf = arc_buf_alloc(spa, size,
|
||||
&osi->os_phys_buf, ARC_BUFC_METADATA);
|
||||
osi->os_phys = osi->os_phys_buf->b_data;
|
||||
bzero(osi->os_phys, sizeof (objset_phys_t));
|
||||
bzero(osi->os_phys, size);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -276,6 +299,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|||
|
||||
osi->os_meta_dnode = dnode_special_open(osi,
|
||||
&osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);
|
||||
if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) {
|
||||
osi->os_userused_dnode = dnode_special_open(osi,
|
||||
&osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT);
|
||||
osi->os_groupused_dnode = dnode_special_open(osi,
|
||||
&osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT);
|
||||
}
|
||||
|
||||
/*
|
||||
* We should be the only thread trying to do this because we
|
||||
|
@ -456,13 +485,15 @@ dmu_objset_evict(dsl_dataset_t *ds, void *arg)
|
|||
os.os = osi;
|
||||
(void) dmu_objset_evict_dbufs(&os);
|
||||
|
||||
ASSERT3P(list_head(&osi->os_dnodes), ==, osi->os_meta_dnode);
|
||||
ASSERT3P(list_tail(&osi->os_dnodes), ==, osi->os_meta_dnode);
|
||||
ASSERT3P(list_head(&osi->os_meta_dnode->dn_dbufs), ==, NULL);
|
||||
|
||||
dnode_special_close(osi->os_meta_dnode);
|
||||
if (osi->os_userused_dnode) {
|
||||
dnode_special_close(osi->os_userused_dnode);
|
||||
dnode_special_close(osi->os_groupused_dnode);
|
||||
}
|
||||
zil_free(osi->os_zil);
|
||||
|
||||
ASSERT3P(list_head(&osi->os_dnodes), ==, NULL);
|
||||
|
||||
VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1);
|
||||
mutex_destroy(&osi->os_lock);
|
||||
mutex_destroy(&osi->os_obj_lock);
|
||||
|
@ -520,6 +551,10 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|||
ASSERT(type != DMU_OST_ANY);
|
||||
ASSERT(type < DMU_OST_NUMTYPES);
|
||||
osi->os_phys->os_type = type;
|
||||
if (dmu_objset_userused_enabled(osi)) {
|
||||
osi->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
|
||||
osi->os_flags = osi->os_phys->os_flags;
|
||||
}
|
||||
|
||||
dsl_dataset_dirty(ds, tx);
|
||||
|
||||
|
@ -704,13 +739,33 @@ struct snaparg {
|
|||
char *snapname;
|
||||
char failed[MAXPATHLEN];
|
||||
boolean_t checkperms;
|
||||
list_t objsets;
|
||||
nvlist_t *props;
|
||||
};
|
||||
|
||||
struct osnode {
|
||||
list_node_t node;
|
||||
objset_t *os;
|
||||
};
|
||||
static int
|
||||
snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
objset_t *os = arg1;
|
||||
struct snaparg *sn = arg2;
|
||||
|
||||
/* The props have already been checked by zfs_check_userprops(). */
|
||||
|
||||
return (dsl_dataset_snapshot_check(os->os->os_dsl_dataset,
|
||||
sn->snapname, tx));
|
||||
}
|
||||
|
||||
static void
|
||||
snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
objset_t *os = arg1;
|
||||
dsl_dataset_t *ds = os->os->os_dsl_dataset;
|
||||
struct snaparg *sn = arg2;
|
||||
|
||||
dsl_dataset_snapshot_sync(ds, sn->snapname, cr, tx);
|
||||
|
||||
if (sn->props)
|
||||
dsl_props_set_sync(ds->ds_prev, sn->props, cr, tx);
|
||||
}
|
||||
|
||||
static int
|
||||
dmu_objset_snapshot_one(char *name, void *arg)
|
||||
|
@ -747,13 +802,8 @@ dmu_objset_snapshot_one(char *name, void *arg)
|
|||
*/
|
||||
err = zil_suspend(dmu_objset_zil(os));
|
||||
if (err == 0) {
|
||||
struct osnode *osn;
|
||||
dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check,
|
||||
dsl_dataset_snapshot_sync, os->os->os_dsl_dataset,
|
||||
sn->snapname, 3);
|
||||
osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP);
|
||||
osn->os = os;
|
||||
list_insert_tail(&sn->objsets, osn);
|
||||
dsl_sync_task_create(sn->dstg, snapshot_check,
|
||||
snapshot_sync, os, sn, 3);
|
||||
} else {
|
||||
dmu_objset_close(os);
|
||||
}
|
||||
|
@ -762,11 +812,11 @@ dmu_objset_snapshot_one(char *name, void *arg)
|
|||
}
|
||||
|
||||
int
|
||||
dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
|
||||
dmu_objset_snapshot(char *fsname, char *snapname,
|
||||
nvlist_t *props, boolean_t recursive)
|
||||
{
|
||||
dsl_sync_task_t *dst;
|
||||
struct osnode *osn;
|
||||
struct snaparg sn = { 0 };
|
||||
struct snaparg sn;
|
||||
spa_t *spa;
|
||||
int err;
|
||||
|
||||
|
@ -778,8 +828,7 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
|
|||
|
||||
sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
sn.snapname = snapname;
|
||||
list_create(&sn.objsets, sizeof (struct osnode),
|
||||
offsetof(struct osnode, node));
|
||||
sn.props = props;
|
||||
|
||||
if (recursive) {
|
||||
sn.checkperms = B_TRUE;
|
||||
|
@ -790,27 +839,19 @@ dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive)
|
|||
err = dmu_objset_snapshot_one(fsname, &sn);
|
||||
}
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = dsl_sync_task_group_wait(sn.dstg);
|
||||
if (err == 0)
|
||||
err = dsl_sync_task_group_wait(sn.dstg);
|
||||
|
||||
for (dst = list_head(&sn.dstg->dstg_tasks); dst;
|
||||
dst = list_next(&sn.dstg->dstg_tasks, dst)) {
|
||||
dsl_dataset_t *ds = dst->dst_arg1;
|
||||
objset_t *os = dst->dst_arg1;
|
||||
dsl_dataset_t *ds = os->os->os_dsl_dataset;
|
||||
if (dst->dst_err)
|
||||
dsl_dataset_name(ds, sn.failed);
|
||||
zil_resume(dmu_objset_zil(os));
|
||||
dmu_objset_close(os);
|
||||
}
|
||||
|
||||
out:
|
||||
while (osn = list_head(&sn.objsets)) {
|
||||
list_remove(&sn.objsets, osn);
|
||||
zil_resume(dmu_objset_zil(osn->os));
|
||||
dmu_objset_close(osn->os);
|
||||
kmem_free(osn, sizeof (struct osnode));
|
||||
}
|
||||
list_destroy(&sn.objsets);
|
||||
|
||||
if (err)
|
||||
(void) strcpy(fsname, sn.failed);
|
||||
dsl_sync_task_group_destroy(sn.dstg);
|
||||
|
@ -819,7 +860,7 @@ out:
|
|||
}
|
||||
|
||||
static void
|
||||
dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
|
||||
dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
|
||||
|
@ -827,14 +868,20 @@ dmu_objset_sync_dnodes(list_t *list, dmu_tx_t *tx)
|
|||
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
|
||||
ASSERT(dn->dn_dbuf->db_data_pending);
|
||||
/*
|
||||
* Initialize dn_zio outside dnode_sync()
|
||||
* to accomodate meta-dnode
|
||||
* Initialize dn_zio outside dnode_sync() because the
|
||||
* meta-dnode needs to set it ouside dnode_sync().
|
||||
*/
|
||||
dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
|
||||
ASSERT(dn->dn_zio);
|
||||
|
||||
ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
|
||||
list_remove(list, dn);
|
||||
|
||||
if (newlist) {
|
||||
(void) dnode_add_ref(dn, newlist);
|
||||
list_insert_tail(newlist, dn);
|
||||
}
|
||||
|
||||
dnode_sync(dn, tx);
|
||||
}
|
||||
}
|
||||
|
@ -853,9 +900,12 @@ ready(zio_t *zio, arc_buf_t *abuf, void *arg)
|
|||
ASSERT(BP_GET_LEVEL(bp) == 0);
|
||||
|
||||
/*
|
||||
* Update rootbp fill count.
|
||||
* Update rootbp fill count: it should be the number of objects
|
||||
* allocated in the object set (not counting the "special"
|
||||
* objects that are stored in the objset_phys_t -- the meta
|
||||
* dnode and user/group accounting objects).
|
||||
*/
|
||||
bp->blk_fill = 1; /* count the meta-dnode */
|
||||
bp->blk_fill = 0;
|
||||
for (int i = 0; i < dnp->dn_nblkptr; i++)
|
||||
bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
|
||||
|
||||
|
@ -878,6 +928,7 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||
writeprops_t wp = { 0 };
|
||||
zio_t *zio;
|
||||
list_t *list;
|
||||
list_t *newlist = NULL;
|
||||
dbuf_dirty_record_t *dr;
|
||||
|
||||
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
|
||||
|
@ -915,20 +966,41 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||
}
|
||||
|
||||
arc_release(os->os_phys_buf, &os->os_phys_buf);
|
||||
|
||||
zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os),
|
||||
tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os,
|
||||
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||
|
||||
/*
|
||||
* Sync meta-dnode - the parent IO for the sync is the root block
|
||||
* Sync special dnodes - the parent IO for the sync is the root block
|
||||
*/
|
||||
os->os_meta_dnode->dn_zio = zio;
|
||||
dnode_sync(os->os_meta_dnode, tx);
|
||||
|
||||
os->os_phys->os_flags = os->os_flags;
|
||||
|
||||
if (os->os_userused_dnode &&
|
||||
os->os_userused_dnode->dn_type != DMU_OT_NONE) {
|
||||
os->os_userused_dnode->dn_zio = zio;
|
||||
dnode_sync(os->os_userused_dnode, tx);
|
||||
os->os_groupused_dnode->dn_zio = zio;
|
||||
dnode_sync(os->os_groupused_dnode, tx);
|
||||
}
|
||||
|
||||
txgoff = tx->tx_txg & TXG_MASK;
|
||||
|
||||
dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx);
|
||||
dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx);
|
||||
if (dmu_objset_userused_enabled(os)) {
|
||||
newlist = &os->os_synced_dnodes;
|
||||
/*
|
||||
* We must create the list here because it uses the
|
||||
* dn_dirty_link[] of this txg.
|
||||
*/
|
||||
list_create(newlist, sizeof (dnode_t),
|
||||
offsetof(dnode_t, dn_dirty_link[txgoff]));
|
||||
}
|
||||
|
||||
dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
|
||||
dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
|
||||
|
||||
list = &os->os_meta_dnode->dn_dirty_records[txgoff];
|
||||
while (dr = list_head(list)) {
|
||||
|
@ -945,6 +1017,145 @@ dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx)
|
|||
zio_nowait(zio);
|
||||
}
|
||||
|
||||
static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
|
||||
|
||||
void
|
||||
dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
|
||||
{
|
||||
used_cbs[ost] = cb;
|
||||
}
|
||||
|
||||
boolean_t
|
||||
dmu_objset_userused_enabled(objset_impl_t *os)
|
||||
{
|
||||
return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
|
||||
used_cbs[os->os_phys->os_type] &&
|
||||
os->os_userused_dnode);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
dnode_t *dn;
|
||||
list_t *list = &os->os_synced_dnodes;
|
||||
static const char zerobuf[DN_MAX_BONUSLEN] = {0};
|
||||
|
||||
ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
|
||||
|
||||
while (dn = list_head(list)) {
|
||||
dmu_object_type_t bonustype;
|
||||
|
||||
ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
|
||||
ASSERT(dn->dn_oldphys);
|
||||
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
|
||||
dn->dn_phys->dn_flags &
|
||||
DNODE_FLAG_USERUSED_ACCOUNTED);
|
||||
|
||||
/* Allocate the user/groupused objects if necessary. */
|
||||
if (os->os_userused_dnode->dn_type == DMU_OT_NONE) {
|
||||
VERIFY(0 == zap_create_claim(&os->os,
|
||||
DMU_USERUSED_OBJECT,
|
||||
DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
|
||||
VERIFY(0 == zap_create_claim(&os->os,
|
||||
DMU_GROUPUSED_OBJECT,
|
||||
DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the object was not previously
|
||||
* accounted, pretend that it was free.
|
||||
*/
|
||||
if (!(dn->dn_oldphys->dn_flags &
|
||||
DNODE_FLAG_USERUSED_ACCOUNTED)) {
|
||||
bzero(dn->dn_oldphys, sizeof (dnode_phys_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the object was freed, use the previous bonustype.
|
||||
*/
|
||||
bonustype = dn->dn_phys->dn_bonustype ?
|
||||
dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype;
|
||||
ASSERT(dn->dn_phys->dn_type != 0 ||
|
||||
(bcmp(DN_BONUS(dn->dn_phys), zerobuf,
|
||||
DN_MAX_BONUSLEN) == 0 &&
|
||||
DN_USED_BYTES(dn->dn_phys) == 0));
|
||||
ASSERT(dn->dn_oldphys->dn_type != 0 ||
|
||||
(bcmp(DN_BONUS(dn->dn_oldphys), zerobuf,
|
||||
DN_MAX_BONUSLEN) == 0 &&
|
||||
DN_USED_BYTES(dn->dn_oldphys) == 0));
|
||||
used_cbs[os->os_phys->os_type](&os->os, bonustype,
|
||||
DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys),
|
||||
DN_USED_BYTES(dn->dn_oldphys),
|
||||
DN_USED_BYTES(dn->dn_phys), tx);
|
||||
|
||||
/*
|
||||
* The mutex is needed here for interlock with dnode_allocate.
|
||||
*/
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t));
|
||||
dn->dn_oldphys = NULL;
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
list_remove(list, dn);
|
||||
dnode_rele(dn, list);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t
|
||||
dmu_objset_userspace_present(objset_t *os)
|
||||
{
|
||||
return (os->os->os_phys->os_flags &
|
||||
OBJSET_FLAG_USERACCOUNTING_COMPLETE);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_objset_userspace_upgrade(objset_t *os)
|
||||
{
|
||||
uint64_t obj;
|
||||
int err = 0;
|
||||
|
||||
if (dmu_objset_userspace_present(os))
|
||||
return (0);
|
||||
if (!dmu_objset_userused_enabled(os->os))
|
||||
return (ENOTSUP);
|
||||
if (dmu_objset_is_snapshot(os))
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* We simply need to mark every object dirty, so that it will be
|
||||
* synced out and now accounted. If this is called
|
||||
* concurrently, or if we already did some work before crashing,
|
||||
* that's fine, since we track each object's accounted state
|
||||
* independently.
|
||||
*/
|
||||
|
||||
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
|
||||
dmu_tx_t *tx = dmu_tx_create(os);
|
||||
dmu_buf_t *db;
|
||||
int objerr;
|
||||
|
||||
if (issig(JUSTLOOKING) && issig(FORREAL))
|
||||
return (EINTR);
|
||||
|
||||
objerr = dmu_bonus_hold(os, obj, FTAG, &db);
|
||||
if (objerr)
|
||||
continue;
|
||||
dmu_tx_hold_bonus(tx, obj);
|
||||
objerr = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (objerr) {
|
||||
dmu_tx_abort(tx);
|
||||
continue;
|
||||
}
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
dmu_buf_rele(db, FTAG);
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
|
||||
txg_wait_synced(dmu_objset_pool(os), 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp)
|
||||
|
@ -978,6 +1189,8 @@ dmu_objset_stats(objset_t *os, nvlist_t *nv)
|
|||
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
|
||||
os->os->os_phys->os_type);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
|
||||
dmu_objset_userspace_present(os));
|
||||
}
|
||||
|
||||
int
|
||||
|
|
|
@ -161,7 +161,9 @@ backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
|||
if (issig(JUSTLOOKING) && issig(FORREAL))
|
||||
return (EINTR);
|
||||
|
||||
if (bp == NULL && zb->zb_object == 0) {
|
||||
if (zb->zb_object != 0 && DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
|
||||
return (0);
|
||||
} else if (bp == NULL && zb->zb_object == 0) {
|
||||
uint64_t span = BP_SPAN(dnp, zb->zb_level);
|
||||
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
|
||||
err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
|
||||
|
@ -775,11 +777,6 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
|||
dmu_tx_t *tx;
|
||||
void *data = NULL;
|
||||
|
||||
err = dmu_object_info(os, drro->drr_object, NULL);
|
||||
|
||||
if (err != 0 && err != ENOENT)
|
||||
return (EINVAL);
|
||||
|
||||
if (drro->drr_type == DMU_OT_NONE ||
|
||||
drro->drr_type >= DMU_OT_NUMTYPES ||
|
||||
drro->drr_bonustype >= DMU_OT_NUMTYPES ||
|
||||
|
@ -792,18 +789,21 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
|||
return (EINVAL);
|
||||
}
|
||||
|
||||
err = dmu_object_info(os, drro->drr_object, NULL);
|
||||
|
||||
if (err != 0 && err != ENOENT)
|
||||
return (EINVAL);
|
||||
|
||||
if (drro->drr_bonuslen) {
|
||||
data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
|
||||
if (ra->err)
|
||||
return (ra->err);
|
||||
}
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
|
||||
if (err == ENOENT) {
|
||||
/* currently free, want to be allocated */
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
|
@ -812,28 +812,22 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
|||
err = dmu_object_claim(os, drro->drr_object,
|
||||
drro->drr_type, drro->drr_blksz,
|
||||
drro->drr_bonustype, drro->drr_bonuslen, tx);
|
||||
dmu_tx_commit(tx);
|
||||
} else {
|
||||
/* currently allocated, want to be allocated */
|
||||
dmu_tx_hold_bonus(tx, drro->drr_object);
|
||||
/*
|
||||
* We may change blocksize and delete old content,
|
||||
* so need to hold_write and hold_free.
|
||||
*/
|
||||
dmu_tx_hold_write(tx, drro->drr_object, 0, 1);
|
||||
dmu_tx_hold_free(tx, drro->drr_object, 0, DMU_OBJECT_END);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = dmu_object_reclaim(os, drro->drr_object,
|
||||
drro->drr_type, drro->drr_blksz,
|
||||
drro->drr_bonustype, drro->drr_bonuslen, tx);
|
||||
drro->drr_bonustype, drro->drr_bonuslen);
|
||||
}
|
||||
if (err) {
|
||||
dmu_tx_commit(tx);
|
||||
if (err)
|
||||
return (EINVAL);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_bonus(tx, drro->drr_object);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -64,6 +64,9 @@ struct traverse_data {
|
|||
void *td_arg;
|
||||
};
|
||||
|
||||
static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *buf, uint64_t objset, uint64_t object);
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
||||
|
@ -189,7 +192,7 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
|||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
int i, j;
|
||||
int i;
|
||||
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
|
||||
err = arc_read(NULL, td->td_spa, bp, pbuf,
|
||||
|
@ -201,20 +204,15 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
|||
/* recursively visitbp() blocks below this */
|
||||
dnp = buf->b_data;
|
||||
for (i = 0; i < epb && err == 0; i++, dnp++) {
|
||||
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
||||
SET_BOOKMARK(&czb, zb->zb_objset,
|
||||
zb->zb_blkid * epb + i,
|
||||
dnp->dn_nlevels - 1, j);
|
||||
err = traverse_visitbp(td, dnp, buf,
|
||||
(blkptr_t *)&dnp->dn_blkptr[j], &czb);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
zb->zb_blkid * epb + i);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
objset_phys_t *osp;
|
||||
int j;
|
||||
dnode_phys_t *dnp;
|
||||
|
||||
err = arc_read_nolock(NULL, td->td_spa, bp,
|
||||
arc_getbuf_func, &buf,
|
||||
|
@ -223,20 +221,19 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
|||
return (err);
|
||||
|
||||
osp = buf->b_data;
|
||||
/*
|
||||
* traverse_zil is just here for zdb's leak checking.
|
||||
* For other consumers, there will be no ZIL blocks.
|
||||
*/
|
||||
traverse_zil(td, &osp->os_zil_header);
|
||||
|
||||
for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
|
||||
SET_BOOKMARK(&czb, zb->zb_objset, 0,
|
||||
osp->os_meta_dnode.dn_nlevels - 1, j);
|
||||
err = traverse_visitbp(td, &osp->os_meta_dnode, buf,
|
||||
(blkptr_t *)&osp->os_meta_dnode.dn_blkptr[j],
|
||||
&czb);
|
||||
if (err)
|
||||
break;
|
||||
dnp = &osp->os_meta_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset, 0);
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_userused_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
DMU_USERUSED_OBJECT);
|
||||
}
|
||||
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
dnp = &osp->os_groupused_dnode;
|
||||
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
|
||||
DMU_GROUPUSED_OBJECT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -249,6 +246,23 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
|
|||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *buf, uint64_t objset, uint64_t object)
|
||||
{
|
||||
int j, err = 0;
|
||||
zbookmark_t czb;
|
||||
|
||||
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
||||
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
||||
err = traverse_visitbp(td, dnp, buf,
|
||||
(blkptr_t *)&dnp->dn_blkptr[j], &czb);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -160,6 +160,41 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
|
|||
return (err);
|
||||
}
|
||||
|
||||
static void
|
||||
dmu_tx_count_indirects(dmu_tx_hold_t *txh, dmu_buf_impl_t *db,
|
||||
boolean_t freeable, dmu_buf_impl_t **history)
|
||||
{
|
||||
int i = db->db_level + 1;
|
||||
dnode_t *dn = db->db_dnode;
|
||||
|
||||
if (i >= dn->dn_nlevels)
|
||||
return;
|
||||
|
||||
db = db->db_parent;
|
||||
if (db == NULL) {
|
||||
uint64_t lvls = dn->dn_nlevels - i;
|
||||
|
||||
txh->txh_space_towrite += lvls << dn->dn_indblkshift;
|
||||
return;
|
||||
}
|
||||
|
||||
if (db != history[i]) {
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint64_t space = 1ULL << dn->dn_indblkshift;
|
||||
|
||||
freeable = (db->db_blkptr && (freeable ||
|
||||
dsl_dataset_block_freeable(ds, db->db_blkptr->blk_birth)));
|
||||
if (freeable)
|
||||
txh->txh_space_tooverwrite += space;
|
||||
else
|
||||
txh->txh_space_towrite += space;
|
||||
if (db->db_blkptr)
|
||||
txh->txh_space_tounref += space;
|
||||
history[i] = db;
|
||||
dmu_tx_count_indirects(txh, db, freeable, history);
|
||||
}
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
|
@ -177,18 +212,24 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|||
min_ibs = DN_MIN_INDBLKSHIFT;
|
||||
max_ibs = DN_MAX_INDBLKSHIFT;
|
||||
|
||||
/*
|
||||
* For i/o error checking, read the first and last level-0
|
||||
* blocks (if they are not aligned), and all the level-1 blocks.
|
||||
*/
|
||||
|
||||
if (dn) {
|
||||
dmu_buf_impl_t *last[DN_MAX_LEVELS];
|
||||
int nlvls = dn->dn_nlevels;
|
||||
int delta;
|
||||
|
||||
/*
|
||||
* For i/o error checking, read the first and last level-0
|
||||
* blocks (if they are not aligned), and all the level-1 blocks.
|
||||
*/
|
||||
if (dn->dn_maxblkid == 0) {
|
||||
if ((off > 0 || len < dn->dn_datablksz) &&
|
||||
off < dn->dn_datablksz) {
|
||||
delta = dn->dn_datablksz;
|
||||
start = (off < dn->dn_datablksz) ? 0 : 1;
|
||||
end = (off+len <= dn->dn_datablksz) ? 0 : 1;
|
||||
if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
|
||||
err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
delta -= off;
|
||||
}
|
||||
} else {
|
||||
zio_t *zio = zio_root(dn->dn_objset->os_spa,
|
||||
|
@ -213,10 +254,9 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|||
}
|
||||
|
||||
/* level-1 blocks */
|
||||
if (dn->dn_nlevels > 1) {
|
||||
start >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
end >>= dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
for (i = start+1; i < end; i++) {
|
||||
if (nlvls > 1) {
|
||||
int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
for (i = (start>>shft)+1; i < end>>shft; i++) {
|
||||
err = dmu_tx_check_ioerr(zio, dn, 1, i);
|
||||
if (err)
|
||||
goto out;
|
||||
|
@ -226,20 +266,70 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|||
err = zio_wait(zio);
|
||||
if (err)
|
||||
goto out;
|
||||
delta = P2NPHASE(off, dn->dn_datablksz);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's more than one block, the blocksize can't change,
|
||||
* so we can make a more precise estimate. Alternatively,
|
||||
* if the dnode's ibs is larger than max_ibs, always use that.
|
||||
* This ensures that if we reduce DN_MAX_INDBLKSHIFT,
|
||||
* the code will still work correctly on existing pools.
|
||||
*/
|
||||
if (dn && (dn->dn_maxblkid != 0 || dn->dn_indblkshift > max_ibs)) {
|
||||
min_ibs = max_ibs = dn->dn_indblkshift;
|
||||
if (dn->dn_datablkshift != 0)
|
||||
if (dn->dn_maxblkid > 0) {
|
||||
/*
|
||||
* The blocksize can't change,
|
||||
* so we can make a more precise estimate.
|
||||
*/
|
||||
ASSERT(dn->dn_datablkshift != 0);
|
||||
min_bs = max_bs = dn->dn_datablkshift;
|
||||
min_ibs = max_ibs = dn->dn_indblkshift;
|
||||
} else if (dn->dn_indblkshift > max_ibs) {
|
||||
/*
|
||||
* This ensures that if we reduce DN_MAX_INDBLKSHIFT,
|
||||
* the code will still work correctly on older pools.
|
||||
*/
|
||||
min_ibs = max_ibs = dn->dn_indblkshift;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this write is not off the end of the file
|
||||
* we need to account for overwrites/unref.
|
||||
*/
|
||||
if (start <= dn->dn_maxblkid)
|
||||
bzero(last, sizeof (dmu_buf_impl_t *) * DN_MAX_LEVELS);
|
||||
while (start <= dn->dn_maxblkid) {
|
||||
spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
dmu_buf_impl_t *db;
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
db = dbuf_hold_level(dn, 0, start, FTAG);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (db->db_blkptr && dsl_dataset_block_freeable(ds,
|
||||
db->db_blkptr->blk_birth)) {
|
||||
dprintf_bp(db->db_blkptr, "can free old%s", "");
|
||||
txh->txh_space_tooverwrite += dn->dn_datablksz;
|
||||
txh->txh_space_tounref += dn->dn_datablksz;
|
||||
dmu_tx_count_indirects(txh, db, TRUE, last);
|
||||
} else {
|
||||
txh->txh_space_towrite += dn->dn_datablksz;
|
||||
if (db->db_blkptr)
|
||||
txh->txh_space_tounref +=
|
||||
bp_get_dasize(spa, db->db_blkptr);
|
||||
dmu_tx_count_indirects(txh, db, FALSE, last);
|
||||
}
|
||||
dbuf_rele(db, FTAG);
|
||||
if (++start > end) {
|
||||
/*
|
||||
* Account for new indirects appearing
|
||||
* before this IO gets assigned into a txg.
|
||||
*/
|
||||
bits = 64 - min_bs;
|
||||
epbs = min_ibs - SPA_BLKPTRSHIFT;
|
||||
for (bits -= epbs * (nlvls - 1);
|
||||
bits >= 0; bits -= epbs)
|
||||
txh->txh_fudge += 1ULL << max_ibs;
|
||||
goto out;
|
||||
}
|
||||
off += delta;
|
||||
if (len >= delta)
|
||||
len -= delta;
|
||||
delta = dn->dn_datablksz;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -262,20 +352,22 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
|||
for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
|
||||
start >>= epbs;
|
||||
end >>= epbs;
|
||||
/*
|
||||
* If we increase the number of levels of indirection,
|
||||
* we'll need new blkid=0 indirect blocks. If start == 0,
|
||||
* we're already accounting for that blocks; and if end == 0,
|
||||
* we can't increase the number of levels beyond that.
|
||||
*/
|
||||
if (start != 0 && end != 0)
|
||||
txh->txh_space_towrite += 1ULL << max_ibs;
|
||||
ASSERT3U(end, >=, start);
|
||||
txh->txh_space_towrite += (end - start + 1) << max_ibs;
|
||||
if (start != 0) {
|
||||
/*
|
||||
* We also need a new blkid=0 indirect block
|
||||
* to reference any existing file data.
|
||||
*/
|
||||
txh->txh_space_towrite += 1ULL << max_ibs;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(txh->txh_space_towrite < 2 * DMU_MAX_ACCESS);
|
||||
|
||||
out:
|
||||
if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
|
||||
2 * DMU_MAX_ACCESS)
|
||||
err = EFBIG;
|
||||
|
||||
if (err)
|
||||
txh->txh_tx->tx_err = err;
|
||||
}
|
||||
|
@ -292,6 +384,7 @@ dmu_tx_count_dnode(dmu_tx_hold_t *txh)
|
|||
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
dn->dn_dbuf->db_blkptr->blk_birth)) {
|
||||
txh->txh_space_tooverwrite += space;
|
||||
txh->txh_space_tounref += space;
|
||||
} else {
|
||||
txh->txh_space_towrite += space;
|
||||
if (dn && dn->dn_dbuf->db_blkptr)
|
||||
|
@ -535,7 +628,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
|
|||
}
|
||||
|
||||
void
|
||||
dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
|
||||
dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
{
|
||||
dmu_tx_hold_t *txh;
|
||||
dnode_t *dn;
|
||||
|
@ -584,9 +677,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
|
|||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
} else {
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tounref +=
|
||||
BP_GET_ASIZE(dn->dn_phys->dn_blkptr);
|
||||
}
|
||||
if (dn->dn_phys->dn_blkptr[0].blk_birth)
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -603,12 +696,9 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* 3 blocks overwritten: target leaf, ptrtbl block, header block
|
||||
* 3 new blocks written if adding: new split leaf, 2 grown ptrtbl blocks
|
||||
*/
|
||||
dmu_tx_count_write(txh, dn->dn_maxblkid * dn->dn_datablksz,
|
||||
(3 + (add ? 3 : 0)) << dn->dn_datablkshift);
|
||||
err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
|
||||
&txh->txh_space_towrite, &txh->txh_space_tooverwrite,
|
||||
txh->txh_dnode->dn_datablkshift);
|
||||
|
||||
/*
|
||||
* If the modified blocks are scattered to the four winds,
|
||||
|
@ -616,7 +706,10 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name)
|
|||
*/
|
||||
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
|
||||
for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
|
||||
txh->txh_space_towrite += 3 << dn->dn_indblkshift;
|
||||
if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
|
||||
txh->txh_space_towrite += 3 << dn->dn_indblkshift;
|
||||
else
|
||||
txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -156,7 +156,7 @@ dnode_verify(dnode_t *dn)
|
|||
}
|
||||
if (dn->dn_phys->dn_type != DMU_OT_NONE)
|
||||
ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
|
||||
ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || dn->dn_dbuf != NULL);
|
||||
ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
|
||||
if (dn->dn_dbuf != NULL) {
|
||||
ASSERT3P(dn->dn_phys, ==,
|
||||
(dnode_phys_t *)dn->dn_dbuf->db.db_data +
|
||||
|
@ -321,6 +321,7 @@ dnode_destroy(dnode_t *dn)
|
|||
}
|
||||
ASSERT(NULL == list_head(&dn->dn_dbufs));
|
||||
#endif
|
||||
ASSERT(dn->dn_oldphys == NULL);
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
list_remove(&os->os_dnodes, dn);
|
||||
|
@ -417,8 +418,7 @@ void
|
|||
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
|
||||
{
|
||||
int i, nblkptr;
|
||||
dmu_buf_impl_t *db = NULL;
|
||||
int nblkptr;
|
||||
|
||||
ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
|
||||
ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
|
||||
|
@ -430,42 +430,25 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
|||
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
|
||||
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
|
||||
for (i = 0; i < TXG_SIZE; i++)
|
||||
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
|
||||
|
||||
/* clean up any unreferenced dbufs */
|
||||
dnode_evict_dbufs(dn);
|
||||
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
|
||||
|
||||
/*
|
||||
* XXX I should really have a generation number to tell if we
|
||||
* need to do this...
|
||||
*/
|
||||
if (blocksize != dn->dn_datablksz ||
|
||||
dn->dn_bonustype != bonustype || dn->dn_bonuslen != bonuslen) {
|
||||
/* free all old data */
|
||||
dnode_free_range(dn, 0, -1ULL, tx);
|
||||
}
|
||||
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
|
||||
/* change blocksize */
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
|
||||
if (blocksize != dn->dn_datablksz &&
|
||||
(!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
|
||||
list_head(&dn->dn_dbufs) != NULL)) {
|
||||
db = dbuf_hold(dn, 0, FTAG);
|
||||
dbuf_new_size(db, blocksize, tx);
|
||||
}
|
||||
dnode_setdblksz(dn, blocksize);
|
||||
dnode_setdirty(dn, tx);
|
||||
dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
|
||||
dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
|
||||
if (dn->dn_datablksz != blocksize) {
|
||||
/* change blocksize */
|
||||
ASSERT(dn->dn_maxblkid == 0 &&
|
||||
(BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
|
||||
dnode_block_freed(dn, 0)));
|
||||
dnode_setdblksz(dn, blocksize);
|
||||
dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
|
||||
}
|
||||
if (dn->dn_bonuslen != bonuslen)
|
||||
dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
|
||||
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
|
||||
if (dn->dn_nblkptr != nblkptr)
|
||||
dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
if (db)
|
||||
dbuf_rele(db, FTAG);
|
||||
|
||||
/* change type */
|
||||
dn->dn_type = ot;
|
||||
|
@ -569,6 +552,22 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
|||
*/
|
||||
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
|
||||
|
||||
if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
|
||||
dn = (object == DMU_USERUSED_OBJECT) ?
|
||||
os->os_userused_dnode : os->os_groupused_dnode;
|
||||
if (dn == NULL)
|
||||
return (ENOENT);
|
||||
type = dn->dn_type;
|
||||
if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
|
||||
return (ENOENT);
|
||||
if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
|
||||
return (EEXIST);
|
||||
DNODE_VERIFY(dn);
|
||||
(void) refcount_add(&dn->dn_holds, tag);
|
||||
*dnp = dn;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (object == 0 || object >= DN_MAX_OBJECT)
|
||||
return (EINVAL);
|
||||
|
||||
|
@ -627,7 +626,8 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
|
|||
type = dn->dn_type;
|
||||
if (dn->dn_free_txg ||
|
||||
((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
|
||||
((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)) {
|
||||
((flag & DNODE_MUST_BE_FREE) &&
|
||||
(type != DMU_OT_NONE || dn->dn_oldphys))) {
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
dbuf_rele(db, FTAG);
|
||||
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
|
||||
|
@ -692,8 +692,10 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
|
|||
objset_impl_t *os = dn->dn_objset;
|
||||
uint64_t txg = tx->tx_txg;
|
||||
|
||||
if (dn->dn_object == DMU_META_DNODE_OBJECT)
|
||||
if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
dsl_dataset_dirty(os->os_dsl_dataset, tx);
|
||||
return;
|
||||
}
|
||||
|
||||
DNODE_VERIFY(dn);
|
||||
|
||||
|
@ -1189,11 +1191,6 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
|
|||
if (dn->dn_free_txg)
|
||||
return (TRUE);
|
||||
|
||||
/*
|
||||
* If dn_datablkshift is not set, then there's only a single
|
||||
* block, in which case there will never be a free range so it
|
||||
* won't matter.
|
||||
*/
|
||||
range_tofind.fr_blkid = blkid;
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
for (i = 0; i < TXG_SIZE; i++) {
|
||||
|
@ -1278,7 +1275,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
|||
dprintf("probing object %llu offset %llx level %d of %u\n",
|
||||
dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
|
||||
|
||||
hole = flags & DNODE_FIND_HOLE;
|
||||
hole = ((flags & DNODE_FIND_HOLE) != 0);
|
||||
inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
|
||||
ASSERT(txg == 0 || !hole);
|
||||
|
||||
|
@ -1325,16 +1322,7 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
|||
|
||||
for (i = (*offset >> span) & (blkfill - 1);
|
||||
i >= 0 && i < blkfill; i += inc) {
|
||||
boolean_t newcontents = B_TRUE;
|
||||
if (txg) {
|
||||
int j;
|
||||
newcontents = B_FALSE;
|
||||
for (j = 0; j < dnp[i].dn_nblkptr; j++) {
|
||||
if (dnp[i].dn_blkptr[j].blk_birth > txg)
|
||||
newcontents = B_TRUE;
|
||||
}
|
||||
}
|
||||
if (!dnp[i].dn_type == hole && newcontents)
|
||||
if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
|
||||
break;
|
||||
*offset += (1ULL << span) * inc;
|
||||
}
|
||||
|
|
|
@ -504,9 +504,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
|
|||
|
||||
/*
|
||||
* Write out the dnode's dirty buffers.
|
||||
*
|
||||
* NOTE: The dnode is kept in memory by being dirty. Once the
|
||||
* dirty bit is cleared, it may be evicted. Beware of this!
|
||||
*/
|
||||
void
|
||||
dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
|
@ -515,20 +512,33 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
dnode_phys_t *dnp = dn->dn_phys;
|
||||
int txgoff = tx->tx_txg & TXG_MASK;
|
||||
list_t *list = &dn->dn_dirty_records[txgoff];
|
||||
static const dnode_phys_t zerodn = { 0 };
|
||||
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
|
||||
ASSERT(dnp->dn_type != DMU_OT_NONE ||
|
||||
bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
|
||||
DNODE_VERIFY(dn);
|
||||
|
||||
ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
|
||||
|
||||
if (dmu_objset_userused_enabled(dn->dn_objset) &&
|
||||
!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
ASSERT(dn->dn_oldphys == NULL);
|
||||
dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
|
||||
*dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
|
||||
dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
|
||||
} else {
|
||||
/* Once we account for it, we should always account for it. */
|
||||
ASSERT(!(dn->dn_phys->dn_flags &
|
||||
DNODE_FLAG_USERUSED_ACCOUNTED));
|
||||
}
|
||||
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
if (dn->dn_allocated_txg == tx->tx_txg) {
|
||||
/* The dnode is newly allocated or reallocated */
|
||||
if (dnp->dn_type == DMU_OT_NONE) {
|
||||
/* this is a first alloc, not a realloc */
|
||||
/* XXX shouldn't the phys already be zeroed? */
|
||||
bzero(dnp, DNODE_CORE_SIZE);
|
||||
dnp->dn_nlevels = 1;
|
||||
dnp->dn_nblkptr = dn->dn_nblkptr;
|
||||
}
|
||||
|
@ -626,7 +636,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
|||
|
||||
dbuf_sync_list(list, tx);
|
||||
|
||||
if (dn->dn_object != DMU_META_DNODE_OBJECT) {
|
||||
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
|
||||
ASSERT3P(list_head(list), ==, NULL);
|
||||
dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
|
|||
return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
|
||||
}
|
||||
|
||||
int
|
||||
boolean_t
|
||||
dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
|
||||
{
|
||||
return (blk_birth > dsl_dataset_prev_snap_txg(ds));
|
||||
|
@ -548,6 +548,7 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
|
|||
return (err);
|
||||
if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
|
||||
dsl_dataset_rele(*dsp, owner);
|
||||
*dsp = NULL;
|
||||
return (EBUSY);
|
||||
}
|
||||
return (0);
|
||||
|
@ -974,6 +975,27 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
|
|||
(void) dmu_free_object(os, obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to sync out all in-flight IO before we try to evict
|
||||
* (the dataset evict func is trying to clear the cached entries
|
||||
* for this dataset in the ARC).
|
||||
*/
|
||||
txg_wait_synced(dd->dd_pool, 0);
|
||||
|
||||
/*
|
||||
* If we managed to free all the objects in open
|
||||
* context, the user space accounting should be zero.
|
||||
*/
|
||||
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
|
||||
dmu_objset_userused_enabled(os->os)) {
|
||||
uint64_t count;
|
||||
|
||||
ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
|
||||
count == 0);
|
||||
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
|
||||
count == 0);
|
||||
}
|
||||
|
||||
dmu_objset_close(os);
|
||||
if (err != ESRCH)
|
||||
goto out;
|
||||
|
@ -1058,7 +1080,6 @@ dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
|
|||
return (ds->ds_user_ptr);
|
||||
}
|
||||
|
||||
|
||||
blkptr_t *
|
||||
dsl_dataset_get_blkptr(dsl_dataset_t *ds)
|
||||
{
|
||||
|
@ -1164,8 +1185,18 @@ kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
|
|||
if (bp == NULL)
|
||||
return (0);
|
||||
|
||||
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
|
||||
(void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx);
|
||||
if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) ||
|
||||
(zb->zb_object != 0 && dnp == NULL)) {
|
||||
/*
|
||||
* It's a block in the intent log. It has no
|
||||
* accounting, so just free it.
|
||||
*/
|
||||
VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool,
|
||||
ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT));
|
||||
} else {
|
||||
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
|
||||
(void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -1209,13 +1240,7 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
|||
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
|
||||
/*
|
||||
* Before the roll back destroy the zil.
|
||||
*/
|
||||
if (ds->ds_user_ptr != NULL) {
|
||||
zil_rollback_destroy(
|
||||
((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
|
||||
|
||||
/*
|
||||
* We need to make sure that the objset_impl_t is reopened after
|
||||
* we do the rollback, otherwise it will have the wrong
|
||||
|
@ -1248,7 +1273,10 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
|||
ds->ds_phys->ds_deadlist_obj));
|
||||
|
||||
{
|
||||
/* Free blkptrs that we gave birth to */
|
||||
/*
|
||||
* Free blkptrs that we gave birth to - this covers
|
||||
* claimed but not played log blocks too.
|
||||
*/
|
||||
zio_t *zio;
|
||||
struct killarg ka;
|
||||
|
||||
|
@ -1262,8 +1290,7 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
|||
(void) zio_wait(zio);
|
||||
}
|
||||
|
||||
ASSERT(!(ds->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) ||
|
||||
ds->ds_phys->ds_unique_bytes == 0);
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
|
||||
|
||||
if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) {
|
||||
/* Change our contents to that of the prev snapshot */
|
||||
|
@ -1481,7 +1508,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
|||
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
|
||||
if (after_branch_point &&
|
||||
ds_prev->ds_phys->ds_next_clones_obj != 0) {
|
||||
VERIFY(0 == zap_remove_int(mos,
|
||||
VERIFY3U(0, ==, zap_remove_int(mos,
|
||||
ds_prev->ds_phys->ds_next_clones_obj, obj, tx));
|
||||
if (ds->ds_phys->ds_next_snap_obj != 0) {
|
||||
VERIFY(0 == zap_add_int(mos,
|
||||
|
@ -1654,7 +1681,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
|
|||
err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
|
||||
TRAVERSE_POST, kill_blkptr, &ka);
|
||||
ASSERT3U(err, ==, 0);
|
||||
ASSERT(spa_version(dp->dp_spa) < SPA_VERSION_UNIQUE_ACCURATE ||
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
|
||||
ds->ds_phys->ds_unique_bytes == 0);
|
||||
}
|
||||
|
||||
|
@ -2583,7 +2610,7 @@ snaplist_destroy(list_t *l, boolean_t own)
|
|||
{
|
||||
struct promotenode *snap;
|
||||
|
||||
if (!list_link_active(&l->list_head))
|
||||
if (!l || !list_link_active(&l->list_head))
|
||||
return;
|
||||
|
||||
while ((snap = list_tail(l)) != NULL) {
|
||||
|
|
|
@ -227,24 +227,11 @@ dsl_dir_namelen(dsl_dir_t *dd)
|
|||
return (result);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dir_is_private(dsl_dir_t *dd)
|
||||
{
|
||||
int rv = FALSE;
|
||||
|
||||
if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent))
|
||||
rv = TRUE;
|
||||
if (dataset_name_hidden(dd->dd_myname))
|
||||
rv = TRUE;
|
||||
return (rv);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
getcomponent(const char *path, char *component, const char **nextp)
|
||||
{
|
||||
char *p;
|
||||
if (path == NULL)
|
||||
if ((path == NULL) || (path[0] == '\0'))
|
||||
return (ENOENT);
|
||||
/* This would be a good place to reserve some namespace... */
|
||||
p = strpbrk(path, "/@");
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -90,6 +90,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
|||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
|
||||
1, 4, 0);
|
||||
|
||||
return (dp);
|
||||
}
|
||||
|
||||
|
@ -129,14 +132,15 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
|||
goto out;
|
||||
err = dsl_dataset_hold_obj(dp, dd->dd_phys->dd_head_dataset_obj,
|
||||
FTAG, &ds);
|
||||
if (err)
|
||||
goto out;
|
||||
err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
|
||||
dp, &dp->dp_origin_snap);
|
||||
if (err)
|
||||
goto out;
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (err == 0) {
|
||||
err = dsl_dataset_hold_obj(dp,
|
||||
ds->ds_phys->ds_prev_snap_obj, dp,
|
||||
&dp->dp_origin_snap);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
dsl_dir_close(dd, dp);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* get scrub status */
|
||||
|
@ -226,6 +230,7 @@ dsl_pool_close(dsl_pool_t *dp)
|
|||
rw_destroy(&dp->dp_config_rwlock);
|
||||
mutex_destroy(&dp->dp_lock);
|
||||
mutex_destroy(&dp->dp_scrub_cancel_lock);
|
||||
taskq_destroy(dp->dp_vnrele_taskq);
|
||||
if (dp->dp_blkstats)
|
||||
kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
|
||||
kmem_free(dp, sizeof (dsl_pool_t));
|
||||
|
@ -296,24 +301,52 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
|||
tx = dmu_tx_create_assigned(dp, txg);
|
||||
|
||||
dp->dp_read_overhead = 0;
|
||||
start = gethrtime();
|
||||
|
||||
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
|
||||
if (!list_link_active(&ds->ds_synced_link))
|
||||
list_insert_tail(&dp->dp_synced_datasets, ds);
|
||||
else
|
||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||
/*
|
||||
* We must not sync any non-MOS datasets twice, because
|
||||
* we may have taken a snapshot of them. However, we
|
||||
* may sync newly-created datasets on pass 2.
|
||||
*/
|
||||
ASSERT(!list_link_active(&ds->ds_synced_link));
|
||||
list_insert_tail(&dp->dp_synced_datasets, ds);
|
||||
dsl_dataset_sync(ds, zio, tx);
|
||||
}
|
||||
DTRACE_PROBE(pool_sync__1setup);
|
||||
|
||||
start = gethrtime();
|
||||
err = zio_wait(zio);
|
||||
|
||||
write_time = gethrtime() - start;
|
||||
ASSERT(err == 0);
|
||||
DTRACE_PROBE(pool_sync__2rootzio);
|
||||
|
||||
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
|
||||
for (ds = list_head(&dp->dp_synced_datasets); ds;
|
||||
ds = list_next(&dp->dp_synced_datasets, ds))
|
||||
dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
|
||||
|
||||
/*
|
||||
* Sync the datasets again to push out the changes due to
|
||||
* userquota updates. This must be done before we process the
|
||||
* sync tasks, because that could cause a snapshot of a dataset
|
||||
* whose ds_bp will be rewritten when we do this 2nd sync.
|
||||
*/
|
||||
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
|
||||
ASSERT(list_link_active(&ds->ds_synced_link));
|
||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||
dsl_dataset_sync(ds, zio, tx);
|
||||
}
|
||||
err = zio_wait(zio);
|
||||
|
||||
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
|
||||
/*
|
||||
* No more sync tasks should have been added while we
|
||||
* were syncing.
|
||||
*/
|
||||
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
|
||||
dsl_sync_task_group_sync(dstg, tx);
|
||||
}
|
||||
DTRACE_PROBE(pool_sync__3task);
|
||||
|
||||
start = gethrtime();
|
||||
|
@ -611,3 +644,9 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
|
|||
dsl_dataset_rele(ds, FTAG);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
}
|
||||
|
||||
taskq_t *
|
||||
dsl_pool_vnrele_taskq(dsl_pool_t *dp)
|
||||
{
|
||||
return (dp->dp_vnrele_taskq);
|
||||
}
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
|
@ -415,6 +413,34 @@ dsl_prop_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
|||
"%s=%s dataset = %llu", psa->name, valstr, ds->ds_object);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_props_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_dataset_t *ds = arg1;
|
||||
nvlist_t *nvl = arg2;
|
||||
nvpair_t *elem = NULL;
|
||||
|
||||
while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
|
||||
struct prop_set_arg psa;
|
||||
|
||||
psa.name = nvpair_name(elem);
|
||||
|
||||
if (nvpair_type(elem) == DATA_TYPE_STRING) {
|
||||
VERIFY(nvpair_value_string(elem,
|
||||
(char **)&psa.buf) == 0);
|
||||
psa.intsz = 1;
|
||||
psa.numints = strlen(psa.buf) + 1;
|
||||
} else {
|
||||
uint64_t intval;
|
||||
VERIFY(nvpair_value_uint64(elem, &intval) == 0);
|
||||
psa.intsz = sizeof (intval);
|
||||
psa.numints = 1;
|
||||
psa.buf = &intval;
|
||||
}
|
||||
dsl_prop_set_sync(ds, &psa, cr, tx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx)
|
||||
|
@ -438,6 +464,7 @@ dsl_prop_set(const char *dsname, const char *propname,
|
|||
int intsz, int numints, const void *buf)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t version;
|
||||
int err;
|
||||
struct prop_set_arg psa;
|
||||
|
||||
|
@ -447,15 +474,19 @@ dsl_prop_set(const char *dsname, const char *propname,
|
|||
*/
|
||||
if (strlen(propname) >= ZAP_MAXNAMELEN)
|
||||
return (ENAMETOOLONG);
|
||||
if (intsz * numints >= ZAP_MAXVALUELEN)
|
||||
return (E2BIG);
|
||||
|
||||
err = dsl_dataset_hold(dsname, FTAG, &ds);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
|
||||
if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
|
||||
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (E2BIG);
|
||||
}
|
||||
if (dsl_dataset_is_snapshot(ds) &&
|
||||
spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_SNAP_PROPS) {
|
||||
version < SPA_VERSION_SNAP_PROPS) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
|
@ -471,6 +502,50 @@ dsl_prop_set(const char *dsname, const char *propname,
|
|||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_props_set(const char *dsname, nvlist_t *nvl)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t version;
|
||||
nvpair_t *elem = NULL;
|
||||
int err;
|
||||
|
||||
if (err = dsl_dataset_hold(dsname, FTAG, &ds))
|
||||
return (err);
|
||||
/*
|
||||
* Do these checks before the syncfunc, since it can't fail.
|
||||
*/
|
||||
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
|
||||
while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
|
||||
if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (ENAMETOOLONG);
|
||||
}
|
||||
if (nvpair_type(elem) == DATA_TYPE_STRING) {
|
||||
char *valstr;
|
||||
VERIFY(nvpair_value_string(elem, &valstr) == 0);
|
||||
if (strlen(valstr) >= (version <
|
||||
SPA_VERSION_STMF_PROP ?
|
||||
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (E2BIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds) &&
|
||||
version < SPA_VERSION_SNAP_PROPS) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
|
||||
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
|
||||
NULL, dsl_props_set_sync, ds, nvl, 2);
|
||||
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over all properties for this dataset and return them in an nvlist.
|
||||
*/
|
||||
|
|
|
@ -45,6 +45,8 @@ typedef int (scrub_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
|
|||
|
||||
static scrub_cb_t dsl_pool_scrub_clean_cb;
|
||||
static dsl_syncfunc_t dsl_pool_scrub_cancel_sync;
|
||||
static void scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
|
||||
uint64_t objset, uint64_t object);
|
||||
|
||||
int zfs_scrub_min_time = 1; /* scrub for at least 1 sec each txg */
|
||||
int zfs_resilver_min_time = 3; /* resilver for at least 3 sec each txg */
|
||||
|
@ -348,6 +350,12 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
|||
if (bp->blk_birth <= dp->dp_scrub_min_txg)
|
||||
return;
|
||||
|
||||
/*
|
||||
* One block ("stubby") can be allocated a long time ago; we
|
||||
* want to visit that one because it has been allocated
|
||||
* (on-disk) even if it hasn't been claimed (even though for
|
||||
* plain scrub there's nothing to do to it).
|
||||
*/
|
||||
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
|
||||
return;
|
||||
|
||||
|
@ -373,6 +381,11 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
|
|||
if (bp->blk_birth <= dp->dp_scrub_min_txg)
|
||||
return;
|
||||
|
||||
/*
|
||||
* birth can be < claim_txg if this record's txg is
|
||||
* already txg sync'ed (but this log block contains
|
||||
* other records that are not synced)
|
||||
*/
|
||||
if (claim_txg == 0 || bp->blk_birth < claim_txg)
|
||||
return;
|
||||
|
||||
|
@ -472,7 +485,7 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
|
|||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
dnode_phys_t *child_dnp;
|
||||
int i, j;
|
||||
int i;
|
||||
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
|
||||
err = arc_read(NULL, dp->dp_spa, bp, pbuf,
|
||||
|
@ -487,20 +500,12 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
|
|||
child_dnp = buf->b_data;
|
||||
|
||||
for (i = 0; i < epb; i++, child_dnp++) {
|
||||
for (j = 0; j < child_dnp->dn_nblkptr; j++) {
|
||||
zbookmark_t czb;
|
||||
|
||||
SET_BOOKMARK(&czb, zb->zb_objset,
|
||||
zb->zb_blkid * epb + i,
|
||||
child_dnp->dn_nlevels - 1, j);
|
||||
scrub_visitbp(dp, child_dnp, buf,
|
||||
&child_dnp->dn_blkptr[j], &czb);
|
||||
}
|
||||
scrub_visitdnode(dp, child_dnp, buf, zb->zb_objset,
|
||||
zb->zb_blkid * epb + i);
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
objset_phys_t *osp;
|
||||
int j;
|
||||
|
||||
err = arc_read_nolock(NULL, dp->dp_spa, bp,
|
||||
arc_getbuf_func, &buf,
|
||||
|
@ -516,13 +521,13 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
|
|||
|
||||
traverse_zil(dp, &osp->os_zil_header);
|
||||
|
||||
for (j = 0; j < osp->os_meta_dnode.dn_nblkptr; j++) {
|
||||
zbookmark_t czb;
|
||||
|
||||
SET_BOOKMARK(&czb, zb->zb_objset, 0,
|
||||
osp->os_meta_dnode.dn_nlevels - 1, j);
|
||||
scrub_visitbp(dp, &osp->os_meta_dnode, buf,
|
||||
&osp->os_meta_dnode.dn_blkptr[j], &czb);
|
||||
scrub_visitdnode(dp, &osp->os_meta_dnode,
|
||||
buf, zb->zb_objset, 0);
|
||||
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
|
||||
scrub_visitdnode(dp, &osp->os_userused_dnode,
|
||||
buf, zb->zb_objset, 0);
|
||||
scrub_visitdnode(dp, &osp->os_groupused_dnode,
|
||||
buf, zb->zb_objset, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -531,6 +536,21 @@ scrub_visitbp(dsl_pool_t *dp, dnode_phys_t *dnp,
|
|||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
}
|
||||
|
||||
static void
|
||||
scrub_visitdnode(dsl_pool_t *dp, dnode_phys_t *dnp, arc_buf_t *buf,
|
||||
uint64_t objset, uint64_t object)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = 0; j < dnp->dn_nblkptr; j++) {
|
||||
zbookmark_t czb;
|
||||
|
||||
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
|
||||
scrub_visitbp(dp, dnp, buf, &dnp->dn_blkptr[j], &czb);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
scrub_visit_rootbp(dsl_pool_t *dp, dsl_dataset_t *ds, blkptr_t *bp)
|
||||
{
|
||||
|
|
|
@ -19,11 +19,111 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
/*
|
||||
* Fletcher Checksums
|
||||
* ------------------
|
||||
*
|
||||
* ZFS's 2nd and 4th order Fletcher checksums are defined by the following
|
||||
* recurrence relations:
|
||||
*
|
||||
* a = a + f
|
||||
* i i-1 i-1
|
||||
*
|
||||
* b = b + a
|
||||
* i i-1 i
|
||||
*
|
||||
* c = c + b (fletcher-4 only)
|
||||
* i i-1 i
|
||||
*
|
||||
* d = d + c (fletcher-4 only)
|
||||
* i i-1 i
|
||||
*
|
||||
* Where
|
||||
* a_0 = b_0 = c_0 = d_0 = 0
|
||||
* and
|
||||
* f_0 .. f_(n-1) are the input data.
|
||||
*
|
||||
* Using standard techniques, these translate into the following series:
|
||||
*
|
||||
* __n_ __n_
|
||||
* \ | \ |
|
||||
* a = > f b = > i * f
|
||||
* n /___| n - i n /___| n - i
|
||||
* i = 1 i = 1
|
||||
*
|
||||
*
|
||||
* __n_ __n_
|
||||
* \ | i*(i+1) \ | i*(i+1)*(i+2)
|
||||
* c = > ------- f d = > ------------- f
|
||||
* n /___| 2 n - i n /___| 6 n - i
|
||||
* i = 1 i = 1
|
||||
*
|
||||
* For fletcher-2, the f_is are 64-bit, and [ab]_i are 64-bit accumulators.
|
||||
* Since the additions are done mod (2^64), errors in the high bits may not
|
||||
* be noticed. For this reason, fletcher-2 is deprecated.
|
||||
*
|
||||
* For fletcher-4, the f_is are 32-bit, and [abcd]_i are 64-bit accumulators.
|
||||
* A conservative estimate of how big the buffer can get before we overflow
|
||||
* can be estimated using f_i = 0xffffffff for all i:
|
||||
*
|
||||
* % bc
|
||||
* f=2^32-1;d=0; for (i = 1; d<2^64; i++) { d += f*i*(i+1)*(i+2)/6 }; (i-1)*4
|
||||
* 2264
|
||||
* quit
|
||||
* %
|
||||
*
|
||||
* So blocks of up to 2k will not overflow. Our largest block size is
|
||||
* 128k, which has 32k 4-byte words, so we can compute the largest possible
|
||||
* accumulators, then divide by 2^64 to figure the max amount of overflow:
|
||||
*
|
||||
* % bc
|
||||
* a=b=c=d=0; f=2^32-1; for (i=1; i<=32*1024; i++) { a+=f; b+=a; c+=b; d+=c }
|
||||
* a/2^64;b/2^64;c/2^64;d/2^64
|
||||
* 0
|
||||
* 0
|
||||
* 1365
|
||||
* 11186858
|
||||
* quit
|
||||
* %
|
||||
*
|
||||
* So a and b cannot overflow. To make sure each bit of input has some
|
||||
* effect on the contents of c and d, we can look at what the factors of
|
||||
* the coefficients in the equations for c_n and d_n are. The number of 2s
|
||||
* in the factors determines the lowest set bit in the multiplier. Running
|
||||
* through the cases for n*(n+1)/2 reveals that the highest power of 2 is
|
||||
* 2^14, and for n*(n+1)*(n+2)/6 it is 2^15. So while some data may overflow
|
||||
* the 64-bit accumulators, every bit of every f_i effects every accumulator,
|
||||
* even for 128k blocks.
|
||||
*
|
||||
* If we wanted to make a stronger version of fletcher4 (fletcher4c?),
|
||||
* we could do our calculations mod (2^32 - 1) by adding in the carries
|
||||
* periodically, and store the number of carries in the top 32-bits.
|
||||
*
|
||||
* --------------------
|
||||
* Checksum Performance
|
||||
* --------------------
|
||||
*
|
||||
* There are two interesting components to checksum performance: cached and
|
||||
* uncached performance. With cached data, fletcher-2 is about four times
|
||||
* faster than fletcher-4. With uncached data, the performance difference is
|
||||
* negligible, since the cost of a cache fill dominates the processing time.
|
||||
* Even though fletcher-4 is slower than fletcher-2, it is still a pretty
|
||||
* efficient pass over the data.
|
||||
*
|
||||
* In normal operation, the data which is being checksummed is in a buffer
|
||||
* which has been filled either by:
|
||||
*
|
||||
* 1. a compression step, which will be mostly cached, or
|
||||
* 2. a bcopy() or copyin(), which will be uncached (because the
|
||||
* copy is cache-bypassing).
|
||||
*
|
||||
* For both cached and uncached data, both fletcher checksums are much faster
|
||||
* than sha-256, and slower than 'off', which doesn't touch the data at all.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysmacros.h>
|
||||
|
|
|
@ -85,6 +85,8 @@ void *arc_data_buf_alloc(uint64_t space);
|
|||
void arc_data_buf_free(void *buf, uint64_t space);
|
||||
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
|
||||
void arc_return_buf(arc_buf_t *buf, void *tag);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
|
@ -134,7 +136,7 @@ void arc_fini(void);
|
|||
* Level 2 ARC
|
||||
*/
|
||||
|
||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd, uint64_t start, uint64_t end);
|
||||
void l2arc_add_vdev(spa_t *spa, vdev_t *vd);
|
||||
void l2arc_remove_vdev(vdev_t *vd);
|
||||
boolean_t l2arc_vdev_present(vdev_t *vd);
|
||||
void l2arc_init(void);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -264,6 +264,7 @@ void dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
|||
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -60,6 +60,7 @@ struct zbookmark;
|
|||
struct spa;
|
||||
struct nvlist;
|
||||
struct objset_impl;
|
||||
struct arc_buf;
|
||||
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
|
@ -114,6 +115,8 @@ typedef enum dmu_object_type {
|
|||
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
|
||||
DMU_OT_NEXT_CLONES, /* ZAP */
|
||||
DMU_OT_SCRUB_QUEUE, /* ZAP */
|
||||
DMU_OT_USERGROUP_USED, /* ZAP */
|
||||
DMU_OT_USERGROUP_QUOTA, /* ZAP */
|
||||
DMU_OT_NUMTYPES
|
||||
} dmu_object_type_t;
|
||||
|
||||
|
@ -156,6 +159,9 @@ void zfs_znode_byteswap(void *buf, size_t size);
|
|||
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
|
||||
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
|
||||
|
||||
#define DMU_USERUSED_OBJECT (-1ULL)
|
||||
#define DMU_GROUPUSED_OBJECT (-2ULL)
|
||||
|
||||
/*
|
||||
* Public routines to create, destroy, open, and close objsets.
|
||||
*/
|
||||
|
@ -171,7 +177,8 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
|||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
boolean_t recursive);
|
||||
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
|
||||
|
@ -235,7 +242,7 @@ uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
|
|||
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
|
||||
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
int blocksize, dmu_object_type_t bonustype, int bonuslen);
|
||||
|
||||
/*
|
||||
* Free an object from this objset.
|
||||
|
@ -397,6 +404,11 @@ void *dmu_buf_get_user(dmu_buf_t *db);
|
|||
*/
|
||||
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Tells if the given dbuf is freeable.
|
||||
*/
|
||||
boolean_t dmu_buf_freeable(dmu_buf_t *);
|
||||
|
||||
/*
|
||||
* You must create a transaction, then hold the objects which you will
|
||||
* (or might) modify as part of this transaction. Then you must assign
|
||||
|
@ -422,7 +434,7 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
|
|||
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
|
||||
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
|
||||
uint64_t len);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, char *name);
|
||||
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
|
||||
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
|
||||
void dmu_tx_abort(dmu_tx_t *tx);
|
||||
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
|
||||
|
@ -445,8 +457,10 @@ int dmu_free_object(objset_t *os, uint64_t object);
|
|||
* Canfail routines will return 0 on success, or an errno if there is a
|
||||
* nonrecoverable I/O error.
|
||||
*/
|
||||
#define DMU_READ_PREFETCH 0 /* prefetch */
|
||||
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
|
||||
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
void *buf);
|
||||
void *buf, uint32_t flags);
|
||||
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
const void *buf, dmu_tx_t *tx);
|
||||
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
|
@ -456,6 +470,10 @@ int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
|
|||
dmu_tx_t *tx);
|
||||
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, struct page *pp, dmu_tx_t *tx);
|
||||
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
|
||||
void dmu_return_arcbuf(struct arc_buf *buf);
|
||||
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
|
||||
|
@ -562,6 +580,12 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
|
|||
int maxlen, boolean_t *conflict);
|
||||
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
|
||||
uint64_t *idp, uint64_t *offp);
|
||||
|
||||
typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
|
||||
void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
|
||||
dmu_tx_t *tx);
|
||||
extern void dmu_objset_register_type(dmu_objset_type_t ost,
|
||||
objset_used_cb_t *cb);
|
||||
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
|
||||
extern void *dmu_objset_get_user(objset_t *os);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -42,12 +42,20 @@ struct dsl_dataset;
|
|||
struct dmu_tx;
|
||||
struct objset_impl;
|
||||
|
||||
#define OBJSET_PHYS_SIZE 2048
|
||||
#define OBJSET_OLD_PHYS_SIZE 1024
|
||||
|
||||
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
|
||||
|
||||
typedef struct objset_phys {
|
||||
dnode_phys_t os_meta_dnode;
|
||||
zil_header_t os_zil_header;
|
||||
uint64_t os_type;
|
||||
char os_pad[1024 - sizeof (dnode_phys_t) - sizeof (zil_header_t) -
|
||||
sizeof (uint64_t)];
|
||||
uint64_t os_flags;
|
||||
char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
|
||||
sizeof (zil_header_t) - sizeof (uint64_t)*2];
|
||||
dnode_phys_t os_userused_dnode;
|
||||
dnode_phys_t os_groupused_dnode;
|
||||
} objset_phys_t;
|
||||
|
||||
struct objset {
|
||||
|
@ -62,6 +70,8 @@ typedef struct objset_impl {
|
|||
arc_buf_t *os_phys_buf;
|
||||
objset_phys_t *os_phys;
|
||||
dnode_t *os_meta_dnode;
|
||||
dnode_t *os_userused_dnode;
|
||||
dnode_t *os_groupused_dnode;
|
||||
zilog_t *os_zil;
|
||||
objset_t os;
|
||||
uint8_t os_checksum; /* can change, under dsl_dir's locks */
|
||||
|
@ -74,6 +84,8 @@ typedef struct objset_impl {
|
|||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
blkptr_t *os_rootbp;
|
||||
zil_header_t os_zil_header;
|
||||
list_t os_synced_dnodes;
|
||||
uint64_t os_flags;
|
||||
|
||||
/* Protected by os_obj_lock */
|
||||
kmutex_t os_obj_lock;
|
||||
|
@ -92,6 +104,7 @@ typedef struct objset_impl {
|
|||
} objset_impl_t;
|
||||
|
||||
#define DMU_META_DNODE_OBJECT 0
|
||||
#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
|
||||
|
||||
#define DMU_OS_IS_L2CACHEABLE(os) \
|
||||
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
|
||||
|
@ -106,7 +119,8 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type,
|
|||
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
|
||||
int dmu_objset_destroy(const char *name);
|
||||
int dmu_objset_rollback(objset_t *os);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
|
||||
boolean_t recursive);
|
||||
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
|
||||
void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
|
||||
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
|
@ -127,6 +141,10 @@ objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
|
|||
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
|
||||
objset_impl_t **osip);
|
||||
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
|
||||
void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
|
||||
boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
|
||||
int dmu_objset_userspace_upgrade(objset_t *os);
|
||||
boolean_t dmu_objset_userspace_present(objset_t *os);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -98,7 +98,8 @@ enum dnode_dirtycontext {
|
|||
};
|
||||
|
||||
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
#define DNODE_FLAG_USED_BYTES (1<<0)
|
||||
#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
|
||||
|
||||
typedef struct dnode_phys {
|
||||
uint8_t dn_type; /* dmu_object_type_t */
|
||||
|
@ -131,10 +132,7 @@ typedef struct dnode {
|
|||
*/
|
||||
krwlock_t dn_struct_rwlock;
|
||||
|
||||
/*
|
||||
* Our link on dataset's dd_dnodes list.
|
||||
* Protected by dd_accounting_mtx.
|
||||
*/
|
||||
/* Our link on dn_objset->os_dnodes list; protected by os_lock. */
|
||||
list_node_t dn_link;
|
||||
|
||||
/* immutable: */
|
||||
|
@ -191,6 +189,9 @@ typedef struct dnode {
|
|||
/* parent IO for current sync write */
|
||||
zio_t *dn_zio;
|
||||
|
||||
/* used in syncing context */
|
||||
dnode_phys_t *dn_oldphys;
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -195,7 +195,7 @@ void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
|
|||
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
|
||||
dmu_tx_t *tx);
|
||||
int dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
|
||||
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
|
||||
|
||||
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DELEG_H
|
||||
#define _SYS_DSL_DELEG_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -51,6 +49,10 @@ extern "C" {
|
|||
#define ZFS_DELEG_PERM_ALLOW "allow"
|
||||
#define ZFS_DELEG_PERM_USERPROP "userprop"
|
||||
#define ZFS_DELEG_PERM_VSCAN "vscan"
|
||||
#define ZFS_DELEG_PERM_USERQUOTA "userquota"
|
||||
#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
|
||||
#define ZFS_DELEG_PERM_USERUSED "userused"
|
||||
#define ZFS_DELEG_PERM_GROUPUSED "groupused"
|
||||
|
||||
/*
|
||||
* Note: the names of properties that are marked delegatable are also
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -107,7 +107,6 @@ int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
|||
const char *tail, void *tag, dsl_dir_t **);
|
||||
void dsl_dir_name(dsl_dir_t *dd, char *buf);
|
||||
int dsl_dir_namelen(dsl_dir_t *dd);
|
||||
int dsl_dir_is_private(dsl_dir_t *dd);
|
||||
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
|
||||
const char *name, dmu_tx_t *tx);
|
||||
dsl_checkfunc_t dsl_dir_destroy_check;
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -77,6 +77,7 @@ typedef struct dsl_pool {
|
|||
struct dsl_dir *dp_mos_dir;
|
||||
struct dsl_dataset *dp_origin_snap;
|
||||
uint64_t dp_root_dir_obj;
|
||||
struct taskq *dp_vnrele_taskq;
|
||||
|
||||
/* No lock needed - sync context only */
|
||||
blkptr_t dp_meta_rootbp;
|
||||
|
@ -143,6 +144,8 @@ int dsl_pool_scrub_clean(dsl_pool_t *dp);
|
|||
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_scrub_restart(dsl_pool_t *dp);
|
||||
|
||||
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -19,18 +19,17 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_PROP_H
|
||||
#define _SYS_DSL_PROP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -66,8 +65,10 @@ int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
|
|||
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
|
||||
int intsz, int numints, void *buf, char *setpoint);
|
||||
|
||||
dsl_syncfunc_t dsl_props_set_sync;
|
||||
int dsl_prop_set(const char *ddname, const char *propname,
|
||||
int intsz, int numints, const void *buf);
|
||||
int dsl_props_set(const char *dsname, nvlist_t *nvl);
|
||||
void dsl_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
|
||||
cred_t *cr, dmu_tx_t *tx);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -39,6 +39,8 @@ extern "C" {
|
|||
typedef struct metaslab_class metaslab_class_t;
|
||||
typedef struct metaslab_group metaslab_group_t;
|
||||
|
||||
extern space_map_ops_t *zfs_metaslab_ops;
|
||||
|
||||
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
|
||||
uint64_t start, uint64_t size, uint64_t txg);
|
||||
extern void metaslab_fini(metaslab_t *msp);
|
||||
|
@ -55,7 +57,7 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
|
|||
boolean_t now);
|
||||
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
|
||||
|
||||
extern metaslab_class_t *metaslab_class_create(void);
|
||||
extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
|
||||
extern void metaslab_class_destroy(metaslab_class_t *mc);
|
||||
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
#define _SYS_METASLAB_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/space_map.h>
|
||||
#include <sys/vdev.h>
|
||||
|
@ -41,6 +39,7 @@ extern "C" {
|
|||
struct metaslab_class {
|
||||
metaslab_group_t *mc_rotor;
|
||||
uint64_t mc_allocated;
|
||||
space_map_ops_t *mc_ops;
|
||||
};
|
||||
|
||||
struct metaslab_group {
|
||||
|
|
|
@ -324,12 +324,9 @@ extern int spa_get_stats(const char *pool, nvlist_t **config,
|
|||
char *altroot, size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str, nvlist_t *zplprops);
|
||||
extern int spa_check_rootconf(char *devpath, char *devid,
|
||||
nvlist_t **bestconf, uint64_t *besttxg);
|
||||
extern boolean_t spa_rootdev_validate(nvlist_t *nv);
|
||||
extern int spa_import_rootpool(char *devpath, char *devid);
|
||||
extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props);
|
||||
extern int spa_import_faulted(const char *, nvlist_t *, nvlist_t *);
|
||||
extern int spa_import_verbatim(const char *, nvlist_t *, nvlist_t *);
|
||||
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
|
||||
extern int spa_destroy(char *pool);
|
||||
extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
|
||||
|
@ -347,6 +344,7 @@ extern void spa_inject_delref(spa_t *spa);
|
|||
#define SPA_ASYNC_PROBE 0x04
|
||||
#define SPA_ASYNC_RESILVER_DONE 0x08
|
||||
#define SPA_ASYNC_RESILVER 0x10
|
||||
#define SPA_ASYNC_AUTOEXPAND 0x20
|
||||
|
||||
/* device manipulation */
|
||||
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
|
||||
|
@ -356,6 +354,7 @@ extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
|
|||
int replace_done);
|
||||
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
|
||||
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
|
||||
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
|
||||
|
||||
/* spare state (which is global across all pools) */
|
||||
extern void spa_spare_add(vdev_t *vd);
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_BOOT_H
|
||||
#define _SYS_SPA_BOOT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -36,7 +34,6 @@ extern "C" {
|
|||
|
||||
extern char *spa_get_bootprop(char *prop);
|
||||
extern void spa_free_bootprop(char *prop);
|
||||
extern int spa_get_rootconf(char *devpath, char *devid, nvlist_t **bestconf_p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -141,9 +141,6 @@ struct spa {
|
|||
int spa_async_suspended; /* async tasks suspended */
|
||||
kcondvar_t spa_async_cv; /* wait for thread_exit() */
|
||||
uint16_t spa_async_tasks; /* async task mask */
|
||||
kmutex_t spa_async_root_lock; /* protects async root count */
|
||||
uint64_t spa_async_root_count; /* number of async root zios */
|
||||
kcondvar_t spa_async_root_cv; /* notify when count == 0 */
|
||||
char *spa_root; /* alternate root directory */
|
||||
uint64_t spa_ena; /* spa-wide ereport ENA */
|
||||
boolean_t spa_last_open_failed; /* true if last open faled */
|
||||
|
@ -163,15 +160,16 @@ struct spa {
|
|||
uint64_t spa_failmode; /* failure mode for the pool */
|
||||
uint64_t spa_delegation; /* delegation on/off */
|
||||
list_t spa_config_list; /* previous cache file(s) */
|
||||
zio_t *spa_async_zio_root; /* root of all async I/O */
|
||||
zio_t *spa_suspend_zio_root; /* root of all suspended I/O */
|
||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||
uint8_t spa_suspended; /* pool is suspended */
|
||||
boolean_t spa_import_faulted; /* allow faulted vdevs */
|
||||
boolean_t spa_is_root; /* pool is root */
|
||||
int spa_minref; /* num refs when first opened */
|
||||
int spa_mode; /* FREAD | FWRITE */
|
||||
spa_log_state_t spa_log_state; /* log state */
|
||||
uint64_t spa_autoexpand; /* lun expansion on/off */
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -46,12 +46,14 @@ typedef struct space_map {
|
|||
uint8_t sm_loading; /* map loading? */
|
||||
kcondvar_t sm_load_cv; /* map load completion */
|
||||
space_map_ops_t *sm_ops; /* space map block picker ops vector */
|
||||
avl_tree_t *sm_pp_root; /* picker-private AVL tree */
|
||||
void *sm_ppd; /* picker-private data */
|
||||
kmutex_t *sm_lock; /* pointer to lock that protects map */
|
||||
} space_map_t;
|
||||
|
||||
typedef struct space_seg {
|
||||
avl_node_t ss_node; /* AVL node */
|
||||
avl_node_t ss_pp_node; /* AVL picker-private node */
|
||||
uint64_t ss_start; /* starting offset of this segment */
|
||||
uint64_t ss_end; /* ending offset (non-inclusive) */
|
||||
} space_seg_t;
|
||||
|
@ -74,6 +76,7 @@ struct space_map_ops {
|
|||
uint64_t (*smop_alloc)(space_map_t *sm, uint64_t size);
|
||||
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
uint64_t (*smop_max)(space_map_t *sm);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -152,6 +155,7 @@ extern void space_map_unload(space_map_t *sm);
|
|||
extern uint64_t space_map_alloc(space_map_t *sm, uint64_t size);
|
||||
extern void space_map_claim(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern void space_map_free(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
extern uint64_t space_map_maxsize(space_map_t *sm);
|
||||
|
||||
extern void space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
space_map_obj_t *smo, objset_t *os, dmu_tx_t *tx);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -50,7 +50,6 @@ extern int vdev_open(vdev_t *);
|
|||
extern int vdev_validate(vdev_t *);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
extern void vdev_init(vdev_t *, uint64_t txg);
|
||||
extern void vdev_reopen(vdev_t *);
|
||||
extern int vdev_validate_aux(vdev_t *vd);
|
||||
extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
|
||||
|
@ -71,6 +70,8 @@ extern boolean_t vdev_resilver_needed(vdev_t *vd,
|
|||
|
||||
extern int vdev_metaslab_init(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_metaslab_fini(vdev_t *vd);
|
||||
extern void vdev_metaslab_set_size(vdev_t *);
|
||||
extern void vdev_expand(vdev_t *vd, uint64_t txg);
|
||||
|
||||
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
|
||||
extern void vdev_clear_stats(vdev_t *vd);
|
||||
|
@ -113,7 +114,8 @@ extern void vdev_queue_io_done(zio_t *zio);
|
|||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
|
||||
boolean_t);
|
||||
|
||||
extern void vdev_state_dirty(vdev_t *vd);
|
||||
extern void vdev_state_clean(vdev_t *vd);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -113,6 +113,7 @@ struct vdev {
|
|||
uint64_t vdev_guid; /* unique ID for this vdev */
|
||||
uint64_t vdev_guid_sum; /* self guid + all child guids */
|
||||
uint64_t vdev_asize; /* allocatable device capacity */
|
||||
uint64_t vdev_min_asize; /* min acceptable asize */
|
||||
uint64_t vdev_ashift; /* block alignment shift */
|
||||
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
|
||||
uint64_t vdev_prevstate; /* used when reopening a vdev */
|
||||
|
@ -125,6 +126,7 @@ struct vdev {
|
|||
uint64_t vdev_children; /* number of children */
|
||||
space_map_t vdev_dtl[DTL_TYPES]; /* in-core dirty time logs */
|
||||
vdev_stat_t vdev_stat; /* virtual device statistics */
|
||||
boolean_t vdev_expanding; /* expand the vdev? */
|
||||
|
||||
/*
|
||||
* Top-level vdev state.
|
||||
|
@ -159,6 +161,7 @@ struct vdev {
|
|||
char *vdev_path; /* vdev path (if any) */
|
||||
char *vdev_devid; /* vdev devid (if any) */
|
||||
char *vdev_physpath; /* vdev device path (if any) */
|
||||
char *vdev_fru; /* physical FRU location */
|
||||
uint64_t vdev_not_present; /* not present during import */
|
||||
uint64_t vdev_unspare; /* unspare when resilvering done */
|
||||
hrtime_t vdev_last_try; /* last reopen time */
|
||||
|
@ -188,8 +191,9 @@ struct vdev {
|
|||
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
|
||||
};
|
||||
|
||||
#define VDEV_SKIP_SIZE (8 << 10)
|
||||
#define VDEV_BOOT_HEADER_SIZE (8 << 10)
|
||||
#define VDEV_PAD_SIZE (8 << 10)
|
||||
/* 2 padding areas (vl_pad1 and vl_pad2) to skip */
|
||||
#define VDEV_SKIP_SIZE VDEV_PAD_SIZE * 2
|
||||
#define VDEV_PHYS_SIZE (112 << 10)
|
||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||
|
||||
|
@ -201,26 +205,14 @@ struct vdev {
|
|||
offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
|
||||
#define VDEV_UBERBLOCK_SIZE(vd) (1ULL << VDEV_UBERBLOCK_SHIFT(vd))
|
||||
|
||||
/* ZFS boot block */
|
||||
#define VDEV_BOOT_MAGIC 0x2f5b007b10cULL
|
||||
#define VDEV_BOOT_VERSION 1 /* version number */
|
||||
|
||||
typedef struct vdev_boot_header {
|
||||
uint64_t vb_magic; /* VDEV_BOOT_MAGIC */
|
||||
uint64_t vb_version; /* VDEV_BOOT_VERSION */
|
||||
uint64_t vb_offset; /* start offset (bytes) */
|
||||
uint64_t vb_size; /* size (bytes) */
|
||||
char vb_pad[VDEV_BOOT_HEADER_SIZE - 4 * sizeof (uint64_t)];
|
||||
} vdev_boot_header_t;
|
||||
|
||||
typedef struct vdev_phys {
|
||||
char vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_block_tail_t)];
|
||||
zio_block_tail_t vp_zbt;
|
||||
} vdev_phys_t;
|
||||
|
||||
typedef struct vdev_label {
|
||||
char vl_pad[VDEV_SKIP_SIZE]; /* 8K */
|
||||
vdev_boot_header_t vl_boot_header; /* 8K */
|
||||
char vl_pad1[VDEV_PAD_SIZE]; /* 8K */
|
||||
char vl_pad2[VDEV_PAD_SIZE]; /* 8K */
|
||||
vdev_phys_t vl_vdev_phys; /* 112K */
|
||||
char vl_uberblock[VDEV_UBERBLOCK_RING]; /* 128K */
|
||||
} vdev_label_t; /* 256K total */
|
||||
|
@ -249,6 +241,7 @@ typedef struct vdev_label {
|
|||
#define VDEV_ALLOC_ADD 1
|
||||
#define VDEV_ALLOC_SPARE 2
|
||||
#define VDEV_ALLOC_L2CACHE 3
|
||||
#define VDEV_ALLOC_ROOTPOOL 4
|
||||
|
||||
/*
|
||||
* Allocate or free a vdev
|
||||
|
@ -269,6 +262,7 @@ extern void vdev_remove_parent(vdev_t *cvd);
|
|||
/*
|
||||
* vdev sync load and sync
|
||||
*/
|
||||
extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
|
||||
extern void vdev_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
|
@ -290,7 +284,8 @@ extern vdev_ops_t vdev_spare_ops;
|
|||
* Common size functions
|
||||
*/
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
|
||||
extern uint64_t vdev_get_rsize(vdev_t *vd);
|
||||
extern uint64_t vdev_get_min_asize(vdev_t *vd);
|
||||
extern void vdev_set_min_asize(vdev_t *vd);
|
||||
|
||||
/*
|
||||
* zdb uses this tunable, so it must be declared here to make lint happy.
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
#define _SYS_ZAP_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* ZAP - ZFS Attribute Processor
|
||||
*
|
||||
|
@ -87,9 +85,6 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ZAP_MAXNAMELEN 256
|
||||
#define ZAP_MAXVALUELEN 1024
|
||||
|
||||
/*
|
||||
* The matchtype specifies which entry will be accessed.
|
||||
* MT_EXACT: only find an exact match (non-normalized)
|
||||
|
@ -186,6 +181,10 @@ int zap_lookup_norm(objset_t *ds, uint64_t zapobj, const char *name,
|
|||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *normalization_conflictp);
|
||||
|
||||
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
|
||||
int add, uint64_t *towrite, uint64_t *tooverwrite,
|
||||
uint64_t dn_datablkshift);
|
||||
|
||||
/*
|
||||
* Create an attribute with the given name and value.
|
||||
*
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_IMPL_H
|
||||
#define _SYS_ZAP_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/avl.h>
|
||||
|
@ -195,6 +193,8 @@ int fzap_count(zap_t *zap, uint64_t *count);
|
|||
int fzap_lookup(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
char *realname, int rn_len, boolean_t *normalization_conflictp);
|
||||
int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
|
||||
uint64_t *tooverwrite);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -114,8 +114,6 @@ typedef struct zfs_acl_phys {
|
|||
uint8_t z_ace_data[ZFS_ACE_SPACE]; /* space for embedded ACEs */
|
||||
} zfs_acl_phys_t;
|
||||
|
||||
|
||||
|
||||
typedef struct acl_ops {
|
||||
uint32_t (*ace_mask_get) (void *acep); /* get access mask */
|
||||
void (*ace_mask_set) (void *acep,
|
||||
|
@ -161,12 +159,21 @@ typedef struct zfs_acl {
|
|||
zfs_acl_node_t *z_curr_node; /* current node iterator is handling */
|
||||
list_t z_acl; /* chunks of ACE data */
|
||||
acl_ops_t z_ops; /* ACL operations */
|
||||
boolean_t z_has_fuids; /* FUIDs present in ACL? */
|
||||
} zfs_acl_t;
|
||||
|
||||
#define ACL_DATA_ALLOCED 0x1
|
||||
#define ZFS_ACL_SIZE(aclcnt) (sizeof (ace_t) * (aclcnt))
|
||||
|
||||
struct zfs_fuid_info;
|
||||
|
||||
typedef struct zfs_acl_ids {
|
||||
uint64_t z_fuid; /* file owner fuid */
|
||||
uint64_t z_fgid; /* file group owner fuid */
|
||||
uint64_t z_mode; /* mode to set on create */
|
||||
zfs_acl_t *z_aclp; /* ACL to create with file */
|
||||
struct zfs_fuid_info *z_fuidp; /* for tracking fuids for log */
|
||||
} zfs_acl_ids_t;
|
||||
|
||||
/*
|
||||
* Property values for acl_mode and acl_inherit.
|
||||
*
|
||||
|
@ -183,16 +190,18 @@ typedef struct zfs_acl {
|
|||
|
||||
struct znode;
|
||||
struct zfsvfs;
|
||||
struct zfs_fuid_info;
|
||||
|
||||
#ifdef _KERNEL
|
||||
void zfs_perm_init(struct znode *, struct znode *, int, vattr_t *,
|
||||
dmu_tx_t *, cred_t *, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
int zfs_acl_ids_create(struct znode *, int, vattr_t *,
|
||||
cred_t *, vsecattr_t *, zfs_acl_ids_t *);
|
||||
void zfs_acl_ids_free(zfs_acl_ids_t *);
|
||||
boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *);
|
||||
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
|
||||
void zfs_acl_rele(void *);
|
||||
void zfs_oldace_byteswap(ace_t *, int);
|
||||
void zfs_ace_byteswap(void *, size_t, boolean_t);
|
||||
extern boolean_t zfs_has_access(struct znode *zp, cred_t *cr);
|
||||
extern int zfs_zaccess(struct znode *, int, int, boolean_t, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
|
@ -202,9 +211,9 @@ int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
|
|||
int zfs_zaccess_rename(struct znode *, struct znode *,
|
||||
struct znode *, struct znode *, cred_t *cr);
|
||||
void zfs_acl_free(zfs_acl_t *);
|
||||
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, zfs_acl_t **);
|
||||
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *,
|
||||
struct zfs_fuid_info **, dmu_tx_t *);
|
||||
int zfs_vsec_2_aclp(struct zfsvfs *, vtype_t, vsecattr_t *, cred_t *,
|
||||
struct zfs_fuid_info **, zfs_acl_t **);
|
||||
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -62,6 +60,7 @@ extern "C" {
|
|||
#include <sys/zfs_debug.h>
|
||||
#include <sys/sysevent.h>
|
||||
#include <sys/sysevent/eventdefs.h>
|
||||
#include <sys/sysevent/dev.h>
|
||||
#include <sys/fm/util.h>
|
||||
|
||||
#define CPU_SEQID (CPU->cpu_seqid)
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_CTLDIR_H
|
||||
#define _ZFS_CTLDIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/vnode.h>
|
||||
#include <sys/zfs_vfsops.h>
|
||||
|
@ -66,6 +64,7 @@ int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
|
|||
|
||||
#define ZFSCTL_INO_ROOT 0x1
|
||||
#define ZFSCTL_INO_SNAPDIR 0x2
|
||||
#define ZFSCTL_INO_SHARES 0x3
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_DIR_H
|
||||
#define _SYS_FS_ZFS_DIR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/pathname.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
|
@ -59,7 +57,7 @@ extern int zfs_link_destroy(zfs_dirlock_t *, znode_t *, dmu_tx_t *, int,
|
|||
extern int zfs_dirlook(znode_t *, char *, vnode_t **, int, int *,
|
||||
pathname_t *);
|
||||
extern void zfs_mknode(znode_t *, vattr_t *, dmu_tx_t *, cred_t *,
|
||||
uint_t, znode_t **, int, zfs_acl_t *, zfs_fuid_info_t **);
|
||||
uint_t, znode_t **, int, zfs_acl_ids_t *);
|
||||
extern void zfs_rmnode(znode_t *);
|
||||
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
|
||||
extern boolean_t zfs_dirempty(znode_t *);
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_FS_ZFS_FUID_H
|
||||
#define _SYS_FS_ZFS_FUID_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kidmap.h>
|
||||
#include <sys/sid.h>
|
||||
|
@ -51,11 +49,11 @@ typedef enum {
|
|||
* Estimate space needed for one more fuid table entry.
|
||||
* for now assume its current size + 1K
|
||||
*/
|
||||
#define FUID_SIZE_ESTIMATE(z) (z->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
|
||||
#define FUID_SIZE_ESTIMATE(z) ((z)->z_fuid_size + (SPA_MINBLOCKSIZE << 1))
|
||||
|
||||
#define FUID_INDEX(x) (x >> 32)
|
||||
#define FUID_RID(x) (x & 0xffffffff)
|
||||
#define FUID_ENCODE(idx, rid) ((idx << 32) | rid)
|
||||
#define FUID_INDEX(x) ((x) >> 32)
|
||||
#define FUID_RID(x) ((x) & 0xffffffff)
|
||||
#define FUID_ENCODE(idx, rid) (((uint64_t)(idx) << 32) | (rid))
|
||||
/*
|
||||
* FUIDs cause problems for the intent log
|
||||
* we need to replay the creation of the FUID,
|
||||
|
@ -104,17 +102,23 @@ struct znode;
|
|||
extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
|
||||
extern void zfs_fuid_destroy(zfsvfs_t *);
|
||||
extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, cred_t *, zfs_fuid_info_t **);
|
||||
cred_t *, zfs_fuid_info_t **);
|
||||
extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
|
||||
dmu_tx_t *, zfs_fuid_info_t **);
|
||||
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr, uid_t *uid,
|
||||
uid_t *gid);
|
||||
zfs_fuid_info_t **);
|
||||
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr,
|
||||
uid_t *uid, uid_t *gid);
|
||||
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
|
||||
extern void zfs_fuid_info_free();
|
||||
extern void zfs_fuid_info_free(zfs_fuid_info_t *);
|
||||
extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
|
||||
void zfs_fuid_sync(zfsvfs_t *, dmu_tx_t *);
|
||||
extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain,
|
||||
char **retdomain, boolean_t addok);
|
||||
extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx);
|
||||
extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
#endif
|
||||
|
||||
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
|
||||
void zfs_fuid_avl_tree_create(avl_tree_t *, avl_tree_t *);
|
||||
uint64_t zfs_fuid_table_load(objset_t *, uint64_t, avl_tree_t *, avl_tree_t *);
|
||||
void zfs_fuid_table_destroy(avl_tree_t *, avl_tree_t *);
|
||||
|
||||
|
|
|
@ -19,15 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
#define _SYS_ZFS_IOCTL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/cred.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zio.h>
|
||||
|
@ -118,7 +116,7 @@ typedef struct zinject_record {
|
|||
uint32_t zi_error;
|
||||
uint64_t zi_type;
|
||||
uint32_t zi_freq;
|
||||
uint32_t zi_pad; /* pad out to 64 bit alignment */
|
||||
uint32_t zi_failfast;
|
||||
} zinject_record_t;
|
||||
|
||||
#define ZINJECT_NULL 0x1
|
||||
|
@ -162,12 +160,20 @@ typedef struct zfs_cmd {
|
|||
uint64_t zc_history_len;
|
||||
uint64_t zc_history_offset;
|
||||
uint64_t zc_obj;
|
||||
uint64_t zc_iflags; /* internal to zfs(7fs) */
|
||||
zfs_share_t zc_share;
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
} zfs_cmd_t;
|
||||
|
||||
typedef struct zfs_useracct {
|
||||
char zu_domain[256];
|
||||
uid_t zu_rid;
|
||||
uint32_t zu_pad;
|
||||
uint64_t zu_space;
|
||||
} zfs_useracct_t;
|
||||
|
||||
#define ZVOL_MAX_MINOR (1 << 16)
|
||||
#define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1)
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -53,6 +53,7 @@ struct zfsvfs {
|
|||
avl_tree_t z_fuid_domain; /* fuid tree keyed by domain */
|
||||
krwlock_t z_fuid_lock; /* fuid lock */
|
||||
boolean_t z_fuid_loaded; /* fuid tables are loaded */
|
||||
boolean_t z_fuid_dirty; /* need to sync fuid table ? */
|
||||
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
|
||||
zilog_t *z_log; /* intent log pointer */
|
||||
uint_t z_acl_mode; /* acl chmod/mode behavior */
|
||||
|
@ -72,8 +73,12 @@ struct zfsvfs {
|
|||
boolean_t z_vscan; /* virus scan on/off */
|
||||
boolean_t z_use_fuids; /* version allows fuids */
|
||||
boolean_t z_replay; /* set during ZIL replay */
|
||||
kmutex_t z_online_recv_lock; /* recv in prog grabs as WRITER */
|
||||
kmutex_t z_online_recv_lock; /* held while recv in progress */
|
||||
uint64_t z_version; /* ZPL version */
|
||||
uint64_t z_shares_dir; /* hidden shares dir */
|
||||
kmutex_t z_lock;
|
||||
uint64_t z_userquota_obj;
|
||||
uint64_t z_groupquota_obj;
|
||||
#define ZFS_OBJ_MTX_SZ 64
|
||||
kmutex_t z_hold_mtx[ZFS_OBJ_MTX_SZ]; /* znode hold locks */
|
||||
};
|
||||
|
@ -130,6 +135,17 @@ extern uint_t zfs_fsyncer_key;
|
|||
|
||||
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs, char *osname, int *mode);
|
||||
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode);
|
||||
extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
const char *domain, uint64_t rid, uint64_t *valuep);
|
||||
extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
|
||||
extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
const char *domain, uint64_t rid, uint64_t quota);
|
||||
extern boolean_t zfs_usergroup_overquota(zfsvfs_t *zfsvfs,
|
||||
boolean_t isgroup, uint64_t fuid);
|
||||
extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
|
||||
extern int zfsvfs_create(const char *name, int mode, zfsvfs_t **zvp);
|
||||
extern void zfsvfs_free(zfsvfs_t *zfsvfs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -93,12 +93,15 @@ extern "C" {
|
|||
|
||||
/*
|
||||
* Special attributes for master node.
|
||||
* "userquota@" and "groupquota@" are also valid (from
|
||||
* zfs_userquota_prop_prefixes[]).
|
||||
*/
|
||||
#define ZFS_FSID "FSID"
|
||||
#define ZFS_UNLINKED_SET "DELETE_QUEUE"
|
||||
#define ZFS_ROOT_OBJ "ROOT"
|
||||
#define ZPL_VERSION_STR "VERSION"
|
||||
#define ZFS_FUID_TABLES "FUID"
|
||||
#define ZFS_SHARES_DIR "SHARES"
|
||||
|
||||
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
|
||||
|
||||
|
@ -309,7 +312,6 @@ extern int zfs_create_op_tables();
|
|||
extern int zfs_sync(vfs_t *vfsp, short flag, cred_t *cr);
|
||||
extern dev_t zfs_cmpldev(uint64_t);
|
||||
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
|
||||
extern int zfs_set_version(const char *name, uint64_t newvers);
|
||||
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
|
||||
extern void zfs_znode_dmu_fini(znode_t *);
|
||||
|
||||
|
@ -336,6 +338,7 @@ extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
|
|||
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
|
||||
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap);
|
||||
extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
|
||||
|
||||
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
|
||||
extern void zfs_unmap_page(page_t *, caddr_t);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -56,9 +56,15 @@ typedef struct zil_header {
|
|||
uint64_t zh_replay_seq; /* highest replayed sequence number */
|
||||
blkptr_t zh_log; /* log chain */
|
||||
uint64_t zh_claim_seq; /* highest claimed sequence number */
|
||||
uint64_t zh_pad[5];
|
||||
uint64_t zh_flags; /* header flags */
|
||||
uint64_t zh_pad[4];
|
||||
} zil_header_t;
|
||||
|
||||
/*
|
||||
* zh_flags bit settings
|
||||
*/
|
||||
#define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */
|
||||
|
||||
/*
|
||||
* Log block trailer - structure at the end of the header and each log block
|
||||
*
|
||||
|
@ -299,7 +305,27 @@ typedef struct {
|
|||
*/
|
||||
|
||||
/*
|
||||
* ZFS intent log transaction structure
|
||||
* Writes are handled in three different ways:
|
||||
*
|
||||
* WR_INDIRECT:
|
||||
* In this mode, if we need to commit the write later, then the block
|
||||
* is immediately written into the file system (using dmu_sync),
|
||||
* and a pointer to the block is put into the log record.
|
||||
* When the txg commits the block is linked in.
|
||||
* This saves additionally writing the data into the log record.
|
||||
* There are a few requirements for this to occur:
|
||||
* - write is greater than zfs/zvol_immediate_write_sz
|
||||
* - not using slogs (as slogs are assumed to always be faster
|
||||
* than writing into the main pool)
|
||||
* - the write occupies only one block
|
||||
* WR_COPIED:
|
||||
* If we know we'll immediately be committing the
|
||||
* transaction (FSYNC or FDSYNC), the we allocate a larger
|
||||
* log record here for the data and copy the data in.
|
||||
* WR_NEED_COPY:
|
||||
* Otherwise we don't allocate a buffer, and *if* we need to
|
||||
* flush the write later then a buffer is allocated and
|
||||
* we retrieve the data using the dmu.
|
||||
*/
|
||||
typedef enum {
|
||||
WR_INDIRECT, /* indirect - a large write (dmu_sync() data */
|
||||
|
@ -359,9 +385,9 @@ extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
|
|||
|
||||
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
|
||||
|
||||
extern int zil_vdev_offline(char *osname, void *txarg);
|
||||
extern int zil_claim(char *osname, void *txarg);
|
||||
extern int zil_check_log_chain(char *osname, void *txarg);
|
||||
extern int zil_clear_log_chain(char *osname, void *txarg);
|
||||
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_clean(zilog_t *zilog);
|
||||
extern int zil_is_committed(zilog_t *zilog);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -101,6 +101,9 @@ typedef struct zil_dva_node {
|
|||
avl_node_t zn_node;
|
||||
} zil_dva_node_t;
|
||||
|
||||
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
|
||||
sizeof (lr_write_t))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -76,7 +76,7 @@ enum zio_checksum {
|
|||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_2
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4
|
||||
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
|
||||
|
||||
enum zio_compress {
|
||||
|
@ -116,30 +116,33 @@ enum zio_compress {
|
|||
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
|
||||
#define ZIO_PRIORITY_TABLE_SIZE 10
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x00000
|
||||
#define ZIO_FLAG_CANFAIL 0x00001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x00002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x00004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x00008
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x000000
|
||||
#define ZIO_FLAG_CANFAIL 0x000001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x000002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x000004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x000008
|
||||
|
||||
#define ZIO_FLAG_DONT_CACHE 0x00010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x00020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x00040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x00080
|
||||
#define ZIO_FLAG_DONT_CACHE 0x000010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x000020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x000040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x000080
|
||||
|
||||
#define ZIO_FLAG_IO_BYPASS 0x00100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x00200
|
||||
#define ZIO_FLAG_IO_RETRY 0x00400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x00800
|
||||
#define ZIO_FLAG_IO_BYPASS 0x000100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x000200
|
||||
#define ZIO_FLAG_IO_RETRY 0x000400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x000800
|
||||
|
||||
#define ZIO_FLAG_SELF_HEAL 0x01000
|
||||
#define ZIO_FLAG_RESILVER 0x02000
|
||||
#define ZIO_FLAG_SCRUB 0x04000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x08000
|
||||
#define ZIO_FLAG_SELF_HEAL 0x001000
|
||||
#define ZIO_FLAG_RESILVER 0x002000
|
||||
#define ZIO_FLAG_SCRUB 0x004000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x008000
|
||||
|
||||
#define ZIO_FLAG_PROBE 0x10000
|
||||
#define ZIO_FLAG_GANG_CHILD 0x20000
|
||||
#define ZIO_FLAG_RAW 0x40000
|
||||
#define ZIO_FLAG_PROBE 0x010000
|
||||
#define ZIO_FLAG_GANG_CHILD 0x020000
|
||||
#define ZIO_FLAG_RAW 0x040000
|
||||
#define ZIO_FLAG_GODFATHER 0x080000
|
||||
|
||||
#define ZIO_FLAG_TRYHARD 0x100000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
|
@ -157,7 +160,8 @@ enum zio_compress {
|
|||
(ZIO_FLAG_GANG_INHERIT | \
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_IO_RETRY | \
|
||||
ZIO_FLAG_PROBE)
|
||||
ZIO_FLAG_PROBE | \
|
||||
ZIO_FLAG_TRYHARD)
|
||||
|
||||
#define ZIO_FLAG_AGG_INHERIT \
|
||||
(ZIO_FLAG_DONT_AGGREGATE | \
|
||||
|
@ -281,7 +285,6 @@ struct zio {
|
|||
int io_cmd;
|
||||
uint8_t io_priority;
|
||||
uint8_t io_reexecute;
|
||||
uint8_t io_async_root;
|
||||
uint8_t io_state[ZIO_WAIT_TYPES];
|
||||
uint64_t io_txg;
|
||||
spa_t *io_spa;
|
||||
|
@ -324,6 +327,7 @@ struct zio {
|
|||
int io_child_error[ZIO_CHILD_TYPES];
|
||||
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
|
||||
uint64_t *io_stall;
|
||||
zio_t *io_gang_leader;
|
||||
zio_gang_node_t *io_gang_tree;
|
||||
void *io_executor;
|
||||
void *io_waiter;
|
||||
|
@ -415,7 +419,7 @@ extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent);
|
|||
extern uint8_t zio_compress_select(uint8_t child, uint8_t parent);
|
||||
|
||||
extern void zio_suspend(spa_t *spa, zio_t *zio);
|
||||
extern void zio_resume(spa_t *spa);
|
||||
extern int zio_resume(spa_t *spa);
|
||||
extern void zio_resume_wait(spa_t *spa);
|
||||
|
||||
/*
|
||||
|
@ -435,7 +439,7 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen,
|
|||
struct zinject_record *record);
|
||||
extern int zio_clear_fault(int id);
|
||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
|
||||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -35,19 +35,36 @@
|
|||
uint64_t metaslab_aliquot = 512ULL << 10;
|
||||
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
|
||||
|
||||
/*
|
||||
* Minimum size which forces the dynamic allocator to change
|
||||
* it's allocation strategy. Once the space map cannot satisfy
|
||||
* an allocation of this size then it switches to using more
|
||||
* aggressive strategy (i.e search by size rather than offset).
|
||||
*/
|
||||
uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
|
||||
|
||||
/*
|
||||
* The minimum free space, in percent, which must be available
|
||||
* in a space map to continue allocations in a first-fit fashion.
|
||||
* Once the space_map's free space drops below this level we dynamically
|
||||
* switch to using best-fit allocations.
|
||||
*/
|
||||
int metaslab_df_free_pct = 30;
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Metaslab classes
|
||||
* ==========================================================================
|
||||
*/
|
||||
metaslab_class_t *
|
||||
metaslab_class_create(void)
|
||||
metaslab_class_create(space_map_ops_t *ops)
|
||||
{
|
||||
metaslab_class_t *mc;
|
||||
|
||||
mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP);
|
||||
|
||||
mc->mc_rotor = NULL;
|
||||
mc->mc_ops = ops;
|
||||
|
||||
return (mc);
|
||||
}
|
||||
|
@ -202,30 +219,14 @@ metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
|
|||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* The first-fit block allocator
|
||||
* ==========================================================================
|
||||
* This is a helper function that can be used by the allocator to find
|
||||
* a suitable block to allocate. This will search the specified AVL
|
||||
* tree looking for a block that matches the specified criteria.
|
||||
*/
|
||||
static void
|
||||
metaslab_ff_load(space_map_t *sm)
|
||||
{
|
||||
ASSERT(sm->sm_ppd == NULL);
|
||||
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_ff_unload(space_map_t *sm)
|
||||
{
|
||||
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
|
||||
sm->sm_ppd = NULL;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
metaslab_ff_alloc(space_map_t *sm, uint64_t size)
|
||||
metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
|
||||
uint64_t align)
|
||||
{
|
||||
avl_tree_t *t = &sm->sm_root;
|
||||
uint64_t align = size & -size;
|
||||
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
|
||||
space_seg_t *ss, ssearch;
|
||||
avl_index_t where;
|
||||
|
||||
|
@ -254,7 +255,37 @@ metaslab_ff_alloc(space_map_t *sm, uint64_t size)
|
|||
return (-1ULL);
|
||||
|
||||
*cursor = 0;
|
||||
return (metaslab_ff_alloc(sm, size));
|
||||
return (metaslab_block_picker(t, cursor, size, align));
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* The first-fit block allocator
|
||||
* ==========================================================================
|
||||
*/
|
||||
static void
|
||||
metaslab_ff_load(space_map_t *sm)
|
||||
{
|
||||
ASSERT(sm->sm_ppd == NULL);
|
||||
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
|
||||
sm->sm_pp_root = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_ff_unload(space_map_t *sm)
|
||||
{
|
||||
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
|
||||
sm->sm_ppd = NULL;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
metaslab_ff_alloc(space_map_t *sm, uint64_t size)
|
||||
{
|
||||
avl_tree_t *t = &sm->sm_root;
|
||||
uint64_t align = size & -size;
|
||||
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
|
||||
|
||||
return (metaslab_block_picker(t, cursor, size, align));
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
|
@ -276,9 +307,136 @@ static space_map_ops_t metaslab_ff_ops = {
|
|||
metaslab_ff_unload,
|
||||
metaslab_ff_alloc,
|
||||
metaslab_ff_claim,
|
||||
metaslab_ff_free
|
||||
metaslab_ff_free,
|
||||
NULL /* maxsize */
|
||||
};
|
||||
|
||||
/*
|
||||
* Dynamic block allocator -
|
||||
* Uses the first fit allocation scheme until space get low and then
|
||||
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
|
||||
* and metaslab_df_free_pct to determine when to switch the allocation scheme.
|
||||
*/
|
||||
|
||||
uint64_t
|
||||
metaslab_df_maxsize(space_map_t *sm)
|
||||
{
|
||||
avl_tree_t *t = sm->sm_pp_root;
|
||||
space_seg_t *ss;
|
||||
|
||||
if (t == NULL || (ss = avl_last(t)) == NULL)
|
||||
return (0ULL);
|
||||
|
||||
return (ss->ss_end - ss->ss_start);
|
||||
}
|
||||
|
||||
static int
|
||||
metaslab_df_seg_compare(const void *x1, const void *x2)
|
||||
{
|
||||
const space_seg_t *s1 = x1;
|
||||
const space_seg_t *s2 = x2;
|
||||
uint64_t ss_size1 = s1->ss_end - s1->ss_start;
|
||||
uint64_t ss_size2 = s2->ss_end - s2->ss_start;
|
||||
|
||||
if (ss_size1 < ss_size2)
|
||||
return (-1);
|
||||
if (ss_size1 > ss_size2)
|
||||
return (1);
|
||||
|
||||
if (s1->ss_start < s2->ss_start)
|
||||
return (-1);
|
||||
if (s1->ss_start > s2->ss_start)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_df_load(space_map_t *sm)
|
||||
{
|
||||
space_seg_t *ss;
|
||||
|
||||
ASSERT(sm->sm_ppd == NULL);
|
||||
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
|
||||
|
||||
sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
|
||||
avl_create(sm->sm_pp_root, metaslab_df_seg_compare,
|
||||
sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
|
||||
|
||||
for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
|
||||
avl_add(sm->sm_pp_root, ss);
|
||||
}
|
||||
|
||||
static void
|
||||
metaslab_df_unload(space_map_t *sm)
|
||||
{
|
||||
void *cookie = NULL;
|
||||
|
||||
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
|
||||
sm->sm_ppd = NULL;
|
||||
|
||||
while (avl_destroy_nodes(sm->sm_pp_root, &cookie) != NULL) {
|
||||
/* tear down the tree */
|
||||
}
|
||||
|
||||
avl_destroy(sm->sm_pp_root);
|
||||
kmem_free(sm->sm_pp_root, sizeof (avl_tree_t));
|
||||
sm->sm_pp_root = NULL;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
metaslab_df_alloc(space_map_t *sm, uint64_t size)
|
||||
{
|
||||
avl_tree_t *t = &sm->sm_root;
|
||||
uint64_t align = size & -size;
|
||||
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
|
||||
uint64_t max_size = metaslab_df_maxsize(sm);
|
||||
int free_pct = sm->sm_space * 100 / sm->sm_size;
|
||||
|
||||
ASSERT(MUTEX_HELD(sm->sm_lock));
|
||||
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
|
||||
|
||||
if (max_size < size)
|
||||
return (-1ULL);
|
||||
|
||||
/*
|
||||
* If we're running low on space switch to using the size
|
||||
* sorted AVL tree (best-fit).
|
||||
*/
|
||||
if (max_size < metaslab_df_alloc_threshold ||
|
||||
free_pct < metaslab_df_free_pct) {
|
||||
t = sm->sm_pp_root;
|
||||
*cursor = 0;
|
||||
}
|
||||
|
||||
return (metaslab_block_picker(t, cursor, size, 1ULL));
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
metaslab_df_claim(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
{
|
||||
/* No need to update cursor */
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
metaslab_df_free(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
{
|
||||
/* No need to update cursor */
|
||||
}
|
||||
|
||||
static space_map_ops_t metaslab_df_ops = {
|
||||
metaslab_df_load,
|
||||
metaslab_df_unload,
|
||||
metaslab_df_alloc,
|
||||
metaslab_df_claim,
|
||||
metaslab_df_free,
|
||||
metaslab_df_maxsize
|
||||
};
|
||||
|
||||
space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Metaslabs
|
||||
|
@ -414,20 +572,28 @@ metaslab_weight(metaslab_t *msp)
|
|||
}
|
||||
|
||||
static int
|
||||
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
||||
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
|
||||
{
|
||||
space_map_t *sm = &msp->ms_map;
|
||||
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
|
||||
|
||||
ASSERT(MUTEX_HELD(&msp->ms_lock));
|
||||
|
||||
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
|
||||
int error = space_map_load(sm, &metaslab_ff_ops,
|
||||
SM_FREE, &msp->ms_smo,
|
||||
int error = space_map_load(sm, sm_ops, SM_FREE, &msp->ms_smo,
|
||||
msp->ms_group->mg_vd->vdev_spa->spa_meta_objset);
|
||||
if (error) {
|
||||
metaslab_group_sort(msp->ms_group, msp, 0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we were able to load the map then make sure
|
||||
* that this map is still able to satisfy our request.
|
||||
*/
|
||||
if (msp->ms_weight < size)
|
||||
return (ENOSPC);
|
||||
|
||||
metaslab_group_sort(msp->ms_group, msp,
|
||||
msp->ms_weight | activation_weight);
|
||||
}
|
||||
|
@ -636,11 +802,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||
int i;
|
||||
|
||||
activation_weight = METASLAB_WEIGHT_PRIMARY;
|
||||
for (i = 0; i < d; i++)
|
||||
if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id)
|
||||
for (i = 0; i < d; i++) {
|
||||
if (DVA_GET_VDEV(&dva[i]) == mg->mg_vd->vdev_id) {
|
||||
activation_weight = METASLAB_WEIGHT_SECONDARY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
boolean_t was_active;
|
||||
|
||||
mutex_enter(&mg->mg_lock);
|
||||
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
|
||||
if (msp->ms_weight < size) {
|
||||
|
@ -648,6 +819,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||
return (-1ULL);
|
||||
}
|
||||
|
||||
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
|
||||
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
|
||||
break;
|
||||
|
||||
|
@ -673,7 +845,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||
* another thread may have changed the weight while we
|
||||
* were blocked on the metaslab lock.
|
||||
*/
|
||||
if (msp->ms_weight < size) {
|
||||
if (msp->ms_weight < size || (was_active &&
|
||||
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
|
||||
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
|
||||
mutex_exit(&msp->ms_lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -686,7 +860,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (metaslab_activate(msp, activation_weight) != 0) {
|
||||
if (metaslab_activate(msp, activation_weight, size) != 0) {
|
||||
mutex_exit(&msp->ms_lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -869,7 +1043,7 @@ next:
|
|||
goto top;
|
||||
}
|
||||
|
||||
if (!zio_lock) {
|
||||
if (!allocatable && !zio_lock) {
|
||||
dshift = 3;
|
||||
zio_lock = B_TRUE;
|
||||
goto top;
|
||||
|
@ -955,7 +1129,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
|||
|
||||
mutex_enter(&msp->ms_lock);
|
||||
|
||||
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
|
||||
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
|
||||
if (error || txg == 0) { /* txg == 0 indicates dry run */
|
||||
mutex_exit(&msp->ms_lock);
|
||||
return (error);
|
||||
|
|
971
module/zfs/spa.c
971
module/zfs/spa.c
File diff suppressed because it is too large
Load Diff
|
@ -432,10 +432,9 @@ spa_config_update_common(spa_t *spa, int what, boolean_t isroot)
|
|||
*/
|
||||
for (c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
if (tvd->vdev_ms_array == 0) {
|
||||
vdev_init(tvd, txg);
|
||||
vdev_config_dirty(tvd);
|
||||
}
|
||||
if (tvd->vdev_ms_array == 0)
|
||||
vdev_metaslab_set_size(tvd);
|
||||
vdev_expand(tvd, txg);
|
||||
}
|
||||
}
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* Routines to manage the on-disk persistent error log.
|
||||
*
|
||||
|
@ -61,8 +59,8 @@
|
|||
* lowercase hexidecimal numbers that don't overflow.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
static uint64_t
|
||||
strtonum(char *str, char **nptr)
|
||||
uint64_t
|
||||
strtonum(const char *str, char **nptr)
|
||||
{
|
||||
uint64_t val = 0;
|
||||
char c;
|
||||
|
@ -82,7 +80,8 @@ strtonum(char *str, char **nptr)
|
|||
str++;
|
||||
}
|
||||
|
||||
*nptr = str;
|
||||
if (nptr)
|
||||
*nptr = (char *)str;
|
||||
|
||||
return (val);
|
||||
}
|
||||
|
|
|
@ -20,12 +20,10 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zap.h>
|
||||
|
@ -127,12 +125,12 @@ spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
|
|||
firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
|
||||
|
||||
if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
|
||||
buf)) != 0)
|
||||
buf, DMU_READ_PREFETCH)) != 0)
|
||||
return (err);
|
||||
if (firstread != sizeof (reclen)) {
|
||||
if ((err = dmu_read(mos, spa->spa_history,
|
||||
shpp->sh_pool_create_len, sizeof (reclen) - firstread,
|
||||
buf + firstread)) != 0)
|
||||
buf + firstread, DMU_READ_PREFETCH)) != 0)
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -380,10 +378,11 @@ spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
|
|||
return (0);
|
||||
}
|
||||
|
||||
err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf);
|
||||
err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
|
||||
DMU_READ_PREFETCH);
|
||||
if (leftover && err == 0) {
|
||||
err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
|
||||
leftover, buf + read_len);
|
||||
leftover, buf + read_len, DMU_READ_PREFETCH);
|
||||
}
|
||||
mutex_exit(&spa->spa_history_lock);
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -425,7 +425,6 @@ spa_add(const char *name, const char *altroot)
|
|||
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
|
||||
|
||||
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_async_root_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
@ -434,7 +433,6 @@ spa_add(const char *name, const char *altroot)
|
|||
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&spa->spa_async_root_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
|
@ -508,12 +506,10 @@ spa_remove(spa_t *spa)
|
|||
spa_config_lock_destroy(spa);
|
||||
|
||||
cv_destroy(&spa->spa_async_cv);
|
||||
cv_destroy(&spa->spa_async_root_cv);
|
||||
cv_destroy(&spa->spa_scrub_io_cv);
|
||||
cv_destroy(&spa->spa_suspend_cv);
|
||||
|
||||
mutex_destroy(&spa->spa_async_lock);
|
||||
mutex_destroy(&spa->spa_async_root_lock);
|
||||
mutex_destroy(&spa->spa_scrub_lock);
|
||||
mutex_destroy(&spa->spa_errlog_lock);
|
||||
mutex_destroy(&spa->spa_errlist_lock);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -116,12 +116,23 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
|||
|
||||
if (merge_before && merge_after) {
|
||||
avl_remove(&sm->sm_root, ss_before);
|
||||
if (sm->sm_pp_root) {
|
||||
avl_remove(sm->sm_pp_root, ss_before);
|
||||
avl_remove(sm->sm_pp_root, ss_after);
|
||||
}
|
||||
ss_after->ss_start = ss_before->ss_start;
|
||||
kmem_free(ss_before, sizeof (*ss_before));
|
||||
ss = ss_after;
|
||||
} else if (merge_before) {
|
||||
ss_before->ss_end = end;
|
||||
if (sm->sm_pp_root)
|
||||
avl_remove(sm->sm_pp_root, ss_before);
|
||||
ss = ss_before;
|
||||
} else if (merge_after) {
|
||||
ss_after->ss_start = start;
|
||||
if (sm->sm_pp_root)
|
||||
avl_remove(sm->sm_pp_root, ss_after);
|
||||
ss = ss_after;
|
||||
} else {
|
||||
ss = kmem_alloc(sizeof (*ss), KM_SLEEP);
|
||||
ss->ss_start = start;
|
||||
|
@ -129,6 +140,9 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
|||
avl_insert(&sm->sm_root, ss, where);
|
||||
}
|
||||
|
||||
if (sm->sm_pp_root)
|
||||
avl_add(sm->sm_pp_root, ss);
|
||||
|
||||
sm->sm_space += size;
|
||||
}
|
||||
|
||||
|
@ -163,12 +177,17 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
|||
left_over = (ss->ss_start != start);
|
||||
right_over = (ss->ss_end != end);
|
||||
|
||||
if (sm->sm_pp_root)
|
||||
avl_remove(sm->sm_pp_root, ss);
|
||||
|
||||
if (left_over && right_over) {
|
||||
newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP);
|
||||
newseg->ss_start = end;
|
||||
newseg->ss_end = ss->ss_end;
|
||||
ss->ss_end = start;
|
||||
avl_insert_here(&sm->sm_root, newseg, ss, AVL_AFTER);
|
||||
if (sm->sm_pp_root)
|
||||
avl_add(sm->sm_pp_root, newseg);
|
||||
} else if (left_over) {
|
||||
ss->ss_end = start;
|
||||
} else if (right_over) {
|
||||
|
@ -176,8 +195,12 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
|||
} else {
|
||||
avl_remove(&sm->sm_root, ss);
|
||||
kmem_free(ss, sizeof (*ss));
|
||||
ss = NULL;
|
||||
}
|
||||
|
||||
if (sm->sm_pp_root && ss != NULL)
|
||||
avl_add(sm->sm_pp_root, ss);
|
||||
|
||||
sm->sm_space -= size;
|
||||
}
|
||||
|
||||
|
@ -288,7 +311,8 @@ space_map_load(space_map_t *sm, space_map_ops_t *ops, uint8_t maptype,
|
|||
smo->smo_object, offset, size);
|
||||
|
||||
mutex_exit(sm->sm_lock);
|
||||
error = dmu_read(os, smo->smo_object, offset, size, entry_map);
|
||||
error = dmu_read(os, smo->smo_object, offset, size, entry_map,
|
||||
DMU_READ_PREFETCH);
|
||||
mutex_enter(sm->sm_lock);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
@ -341,6 +365,15 @@ space_map_unload(space_map_t *sm)
|
|||
space_map_vacate(sm, NULL, NULL);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
space_map_maxsize(space_map_t *sm)
|
||||
{
|
||||
if (sm->sm_loaded && sm->sm_ops != NULL)
|
||||
return (sm->sm_ops->smop_max(sm));
|
||||
else
|
||||
return (-1ULL);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
space_map_alloc(space_map_t *sm, uint64_t size)
|
||||
{
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <sys/zap.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
/*
|
||||
* Virtual device management.
|
||||
|
@ -83,9 +84,8 @@ vdev_default_asize(vdev_t *vd, uint64_t psize)
|
|||
{
|
||||
uint64_t asize = P2ROUNDUP(psize, 1ULL << vd->vdev_top->vdev_ashift);
|
||||
uint64_t csize;
|
||||
uint64_t c;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
csize = vdev_psize_to_asize(vd->vdev_child[c], psize);
|
||||
asize = MAX(asize, csize);
|
||||
}
|
||||
|
@ -94,40 +94,47 @@ vdev_default_asize(vdev_t *vd, uint64_t psize)
|
|||
}
|
||||
|
||||
/*
|
||||
* Get the replaceable or attachable device size.
|
||||
* If the parent is a mirror or raidz, the replaceable size is the minimum
|
||||
* psize of all its children. For the rest, just return our own psize.
|
||||
*
|
||||
* e.g.
|
||||
* psize rsize
|
||||
* root - -
|
||||
* mirror/raidz - -
|
||||
* disk1 20g 20g
|
||||
* disk2 40g 20g
|
||||
* disk3 80g 80g
|
||||
* Get the minimum allocatable size. We define the allocatable size as
|
||||
* the vdev's asize rounded to the nearest metaslab. This allows us to
|
||||
* replace or attach devices which don't have the same physical size but
|
||||
* can still satisfy the same number of allocations.
|
||||
*/
|
||||
uint64_t
|
||||
vdev_get_rsize(vdev_t *vd)
|
||||
vdev_get_min_asize(vdev_t *vd)
|
||||
{
|
||||
vdev_t *pvd, *cvd;
|
||||
uint64_t c, rsize;
|
||||
|
||||
pvd = vd->vdev_parent;
|
||||
vdev_t *pvd = vd->vdev_parent;
|
||||
|
||||
/*
|
||||
* If our parent is NULL or the root, just return our own psize.
|
||||
* The our parent is NULL (inactive spare or cache) or is the root,
|
||||
* just return our own asize.
|
||||
*/
|
||||
if (pvd == NULL || pvd->vdev_parent == NULL)
|
||||
return (vd->vdev_psize);
|
||||
if (pvd == NULL)
|
||||
return (vd->vdev_asize);
|
||||
|
||||
rsize = 0;
|
||||
/*
|
||||
* The top-level vdev just returns the allocatable size rounded
|
||||
* to the nearest metaslab.
|
||||
*/
|
||||
if (vd == vd->vdev_top)
|
||||
return (P2ALIGN(vd->vdev_asize, 1ULL << vd->vdev_ms_shift));
|
||||
|
||||
for (c = 0; c < pvd->vdev_children; c++) {
|
||||
cvd = pvd->vdev_child[c];
|
||||
rsize = MIN(rsize - 1, cvd->vdev_psize - 1) + 1;
|
||||
}
|
||||
/*
|
||||
* The allocatable space for a raidz vdev is N * sizeof(smallest child),
|
||||
* so each child must provide at least 1/Nth of its asize.
|
||||
*/
|
||||
if (pvd->vdev_ops == &vdev_raidz_ops)
|
||||
return (pvd->vdev_min_asize / pvd->vdev_children);
|
||||
|
||||
return (rsize);
|
||||
return (pvd->vdev_min_asize);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_set_min_asize(vdev_t *vd)
|
||||
{
|
||||
vd->vdev_min_asize = vdev_get_min_asize(vd);
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_set_min_asize(vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
vdev_t *
|
||||
|
@ -148,13 +155,12 @@ vdev_lookup_top(spa_t *spa, uint64_t vdev)
|
|||
vdev_t *
|
||||
vdev_lookup_by_guid(vdev_t *vd, uint64_t guid)
|
||||
{
|
||||
int c;
|
||||
vdev_t *mvd;
|
||||
|
||||
if (vd->vdev_guid == guid)
|
||||
return (vd);
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
if ((mvd = vdev_lookup_by_guid(vd->vdev_child[c], guid)) !=
|
||||
NULL)
|
||||
return (mvd);
|
||||
|
@ -250,17 +256,17 @@ vdev_compact_children(vdev_t *pvd)
|
|||
{
|
||||
vdev_t **newchild, *cvd;
|
||||
int oldc = pvd->vdev_children;
|
||||
int newc, c;
|
||||
int newc;
|
||||
|
||||
ASSERT(spa_config_held(pvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
|
||||
|
||||
for (c = newc = 0; c < oldc; c++)
|
||||
for (int c = newc = 0; c < oldc; c++)
|
||||
if (pvd->vdev_child[c])
|
||||
newc++;
|
||||
|
||||
newchild = kmem_alloc(newc * sizeof (vdev_t *), KM_SLEEP);
|
||||
|
||||
for (c = newc = 0; c < oldc; c++) {
|
||||
for (int c = newc = 0; c < oldc; c++) {
|
||||
if ((cvd = pvd->vdev_child[c]) != NULL) {
|
||||
newchild[newc] = cvd;
|
||||
cvd->vdev_id = newc++;
|
||||
|
@ -372,6 +378,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
} else if (alloctype == VDEV_ALLOC_L2CACHE) {
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
|
||||
return (EINVAL);
|
||||
} else if (alloctype == VDEV_ALLOC_ROOTPOOL) {
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) != 0)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -435,6 +444,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
|
||||
&vd->vdev_physpath) == 0)
|
||||
vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
|
||||
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
|
||||
vd->vdev_fru = spa_strdup(vd->vdev_fru);
|
||||
|
||||
/*
|
||||
* Set the whole_disk property. If it's not specified, leave the value
|
||||
|
@ -448,9 +459,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
* Look for the 'not present' flag. This will only be set if the device
|
||||
* was not present at the time of import.
|
||||
*/
|
||||
if (!spa->spa_import_faulted)
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
&vd->vdev_not_present);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
&vd->vdev_not_present);
|
||||
|
||||
/*
|
||||
* Get the alignment requirement.
|
||||
|
@ -473,13 +483,23 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
* If we're a leaf vdev, try to load the DTL object and other state.
|
||||
*/
|
||||
if (vd->vdev_ops->vdev_op_leaf &&
|
||||
(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE)) {
|
||||
(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE ||
|
||||
alloctype == VDEV_ALLOC_ROOTPOOL)) {
|
||||
if (alloctype == VDEV_ALLOC_LOAD) {
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
|
||||
&vd->vdev_dtl_smo.smo_object);
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE,
|
||||
&vd->vdev_unspare);
|
||||
}
|
||||
|
||||
if (alloctype == VDEV_ALLOC_ROOTPOOL) {
|
||||
uint64_t spare = 0;
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
|
||||
&spare) == 0 && spare)
|
||||
spa_spare_add(vd);
|
||||
}
|
||||
|
||||
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
|
||||
&vd->vdev_offline);
|
||||
|
||||
|
@ -511,7 +531,6 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
|||
void
|
||||
vdev_free(vdev_t *vd)
|
||||
{
|
||||
int c;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
/*
|
||||
|
@ -525,7 +544,7 @@ vdev_free(vdev_t *vd)
|
|||
/*
|
||||
* Free all children.
|
||||
*/
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_free(vd->vdev_child[c]);
|
||||
|
||||
ASSERT(vd->vdev_child == NULL);
|
||||
|
@ -560,6 +579,8 @@ vdev_free(vdev_t *vd)
|
|||
spa_strfree(vd->vdev_devid);
|
||||
if (vd->vdev_physpath)
|
||||
spa_strfree(vd->vdev_physpath);
|
||||
if (vd->vdev_fru)
|
||||
spa_strfree(vd->vdev_fru);
|
||||
|
||||
if (vd->vdev_isspare)
|
||||
spa_spare_remove(vd);
|
||||
|
@ -653,14 +674,12 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
|||
static void
|
||||
vdev_top_update(vdev_t *tvd, vdev_t *vd)
|
||||
{
|
||||
int c;
|
||||
|
||||
if (vd == NULL)
|
||||
return;
|
||||
|
||||
vd->vdev_top = tvd;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_top_update(tvd, vd->vdev_child[c]);
|
||||
}
|
||||
|
||||
|
@ -679,6 +698,7 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
|
|||
mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops);
|
||||
|
||||
mvd->vdev_asize = cvd->vdev_asize;
|
||||
mvd->vdev_min_asize = cvd->vdev_min_asize;
|
||||
mvd->vdev_ashift = cvd->vdev_ashift;
|
||||
mvd->vdev_state = cvd->vdev_state;
|
||||
|
||||
|
@ -751,6 +771,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
|||
if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Compute the raidz-deflation ratio. Note, we hard-code
|
||||
* in 128k (1 << 17) because it is the current "typical" blocksize.
|
||||
* Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
|
||||
* or we will inconsistently account for existing bp's.
|
||||
*/
|
||||
vd->vdev_deflate_ratio = (1 << 17) /
|
||||
(vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
ASSERT(oldc <= newc);
|
||||
|
||||
if (vd->vdev_islog)
|
||||
|
@ -776,7 +805,8 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
|||
if (txg == 0) {
|
||||
uint64_t object = 0;
|
||||
error = dmu_read(mos, vd->vdev_ms_array,
|
||||
m * sizeof (uint64_t), sizeof (uint64_t), &object);
|
||||
m * sizeof (uint64_t), sizeof (uint64_t), &object,
|
||||
DMU_READ_PREFETCH);
|
||||
if (error)
|
||||
return (error);
|
||||
if (object != 0) {
|
||||
|
@ -903,7 +933,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
|||
|
||||
vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
|
||||
ZIO_FLAG_DONT_RETRY;
|
||||
ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
|
||||
/*
|
||||
|
@ -950,8 +980,8 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
|||
for (int l = 1; l < VDEV_LABELS; l++) {
|
||||
zio_nowait(zio_read_phys(pio, vd,
|
||||
vdev_label_offset(vd->vdev_psize, l,
|
||||
offsetof(vdev_label_t, vl_pad)),
|
||||
VDEV_SKIP_SIZE, zio_buf_alloc(VDEV_SKIP_SIZE),
|
||||
offsetof(vdev_label_t, vl_pad2)),
|
||||
VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE),
|
||||
ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
|
||||
ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE));
|
||||
}
|
||||
|
@ -971,7 +1001,6 @@ vdev_open(vdev_t *vd)
|
|||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int error;
|
||||
int c;
|
||||
uint64_t osize = 0;
|
||||
uint64_t asize, psize;
|
||||
uint64_t ashift = 0;
|
||||
|
@ -983,6 +1012,9 @@ vdev_open(vdev_t *vd)
|
|||
vd->vdev_state == VDEV_STATE_OFFLINE);
|
||||
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
|
||||
vd->vdev_cant_read = B_FALSE;
|
||||
vd->vdev_cant_write = B_FALSE;
|
||||
vd->vdev_min_asize = vdev_get_min_asize(vd);
|
||||
|
||||
if (!vd->vdev_removed && vd->vdev_faulted) {
|
||||
ASSERT(vd->vdev_children == 0);
|
||||
|
@ -998,7 +1030,7 @@ vdev_open(vdev_t *vd)
|
|||
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
|
||||
|
||||
if (zio_injection_enabled && error == 0)
|
||||
error = zio_handle_device_injection(vd, ENXIO);
|
||||
error = zio_handle_device_injection(vd, NULL, ENXIO);
|
||||
|
||||
if (error) {
|
||||
if (vd->vdev_removed &&
|
||||
|
@ -1020,12 +1052,13 @@ vdev_open(vdev_t *vd)
|
|||
vd->vdev_state = VDEV_STATE_HEALTHY;
|
||||
}
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
if (vd->vdev_child[c]->vdev_state != VDEV_STATE_HEALTHY) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_DEGRADED,
|
||||
VDEV_AUX_NONE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
|
||||
|
||||
|
@ -1050,6 +1083,15 @@ vdev_open(vdev_t *vd)
|
|||
|
||||
vd->vdev_psize = psize;
|
||||
|
||||
/*
|
||||
* Make sure the allocatable size hasn't shrunk.
|
||||
*/
|
||||
if (asize < vd->vdev_min_asize) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
if (vd->vdev_asize == 0) {
|
||||
/*
|
||||
* This is the first-ever open, so use the computed values.
|
||||
|
@ -1066,26 +1108,19 @@ vdev_open(vdev_t *vd)
|
|||
VDEV_AUX_BAD_LABEL);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure the device hasn't shrunk.
|
||||
*/
|
||||
if (asize < vd->vdev_asize) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* If all children are healthy and the asize has increased,
|
||||
* then we've experienced dynamic LUN growth.
|
||||
*/
|
||||
if (vd->vdev_state == VDEV_STATE_HEALTHY &&
|
||||
asize > vd->vdev_asize) {
|
||||
vd->vdev_asize = asize;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If all children are healthy and the asize has increased,
|
||||
* then we've experienced dynamic LUN growth. If automatic
|
||||
* expansion is enabled then use the additional space.
|
||||
*/
|
||||
if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize &&
|
||||
(vd->vdev_expanding || spa->spa_autoexpand))
|
||||
vd->vdev_asize = asize;
|
||||
|
||||
vdev_set_min_asize(vd);
|
||||
|
||||
/*
|
||||
* Ensure we can issue some IO before declaring the
|
||||
* vdev open for business.
|
||||
|
@ -1097,18 +1132,6 @@ vdev_open(vdev_t *vd)
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a top-level vdev, compute the raidz-deflation
|
||||
* ratio. Note, we hard-code in 128k (1<<17) because it is the
|
||||
* current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
|
||||
* changes, this algorithm must never change, or we will
|
||||
* inconsistently account for existing bp's.
|
||||
*/
|
||||
if (vd->vdev_top == vd) {
|
||||
vd->vdev_deflate_ratio = (1<<17) /
|
||||
(vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a leaf vdev has a DTL, and seems healthy, then kick off a
|
||||
* resilver. But don't do this if we are doing a reopen for a scrub,
|
||||
|
@ -1135,12 +1158,11 @@ int
|
|||
vdev_validate(vdev_t *vd)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int c;
|
||||
nvlist_t *label;
|
||||
uint64_t guid, top_guid;
|
||||
uint64_t state;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
if (vdev_validate(vd->vdev_child[c]) != 0)
|
||||
return (EBADF);
|
||||
|
||||
|
@ -1226,7 +1248,7 @@ vdev_close(vdev_t *vd)
|
|||
vdev_cache_purge(vd);
|
||||
|
||||
/*
|
||||
* We record the previous state before we close it, so that if we are
|
||||
* We record the previous state before we close it, so that if we are
|
||||
* doing a reopen(), we don't generate FMA ereports if we notice that
|
||||
* it's still faulted.
|
||||
*/
|
||||
|
@ -1257,12 +1279,9 @@ vdev_reopen(vdev_t *vd)
|
|||
if (vd->vdev_aux) {
|
||||
(void) vdev_validate_aux(vd);
|
||||
if (vdev_readable(vd) && vdev_writeable(vd) &&
|
||||
!l2arc_vdev_present(vd)) {
|
||||
uint64_t size = vdev_get_rsize(vd);
|
||||
l2arc_add_vdev(spa, vd,
|
||||
VDEV_LABEL_START_SIZE,
|
||||
size - VDEV_LABEL_START_SIZE);
|
||||
}
|
||||
vd->vdev_aux == &spa->spa_l2cache &&
|
||||
!l2arc_vdev_present(vd))
|
||||
l2arc_add_vdev(spa, vd);
|
||||
} else {
|
||||
(void) vdev_validate(vd);
|
||||
}
|
||||
|
@ -1302,26 +1321,14 @@ vdev_create(vdev_t *vd, uint64_t txg, boolean_t isreplacing)
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The is the latter half of vdev_create(). It is distinct because it
|
||||
* involves initiating transactions in order to do metaslab creation.
|
||||
* For creation, we want to try to create all vdevs at once and then undo it
|
||||
* if anything fails; this is much harder if we have pending transactions.
|
||||
*/
|
||||
void
|
||||
vdev_init(vdev_t *vd, uint64_t txg)
|
||||
vdev_metaslab_set_size(vdev_t *vd)
|
||||
{
|
||||
/*
|
||||
* Aim for roughly 200 metaslabs per vdev.
|
||||
*/
|
||||
vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
|
||||
vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
|
||||
|
||||
/*
|
||||
* Initialize the vdev's metaslabs. This can't fail because
|
||||
* there's nothing to read when creating all new metaslabs.
|
||||
*/
|
||||
VERIFY(vdev_metaslab_init(vd, txg) == 0);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1879,7 +1886,7 @@ vdev_degrade(spa_t *spa, uint64_t guid)
|
|||
int
|
||||
vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
||||
{
|
||||
vdev_t *vd;
|
||||
vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev;
|
||||
|
||||
spa_vdev_state_enter(spa);
|
||||
|
||||
|
@ -1889,13 +1896,26 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
|||
if (!vd->vdev_ops->vdev_op_leaf)
|
||||
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
|
||||
|
||||
tvd = vd->vdev_top;
|
||||
vd->vdev_offline = B_FALSE;
|
||||
vd->vdev_tmpoffline = B_FALSE;
|
||||
vd->vdev_checkremove = !!(flags & ZFS_ONLINE_CHECKREMOVE);
|
||||
vd->vdev_forcefault = !!(flags & ZFS_ONLINE_FORCEFAULT);
|
||||
vdev_reopen(vd->vdev_top);
|
||||
|
||||
/* XXX - L2ARC 1.0 does not support expansion */
|
||||
if (!vd->vdev_aux) {
|
||||
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
|
||||
pvd->vdev_expanding = !!(flags & ZFS_ONLINE_EXPAND);
|
||||
}
|
||||
|
||||
vdev_reopen(tvd);
|
||||
vd->vdev_checkremove = vd->vdev_forcefault = B_FALSE;
|
||||
|
||||
if (!vd->vdev_aux) {
|
||||
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
|
||||
pvd->vdev_expanding = B_FALSE;
|
||||
}
|
||||
|
||||
if (newstate)
|
||||
*newstate = vd->vdev_state;
|
||||
if ((flags & ZFS_ONLINE_UNSPARE) &&
|
||||
|
@ -1904,13 +1924,21 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
|||
vd->vdev_parent->vdev_child[0] == vd)
|
||||
vd->vdev_unspare = B_TRUE;
|
||||
|
||||
if ((flags & ZFS_ONLINE_EXPAND) || spa->spa_autoexpand) {
|
||||
|
||||
/* XXX - L2ARC 1.0 does not support expansion */
|
||||
if (vd->vdev_aux)
|
||||
return (spa_vdev_state_exit(spa, vd, ENOTSUP));
|
||||
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
|
||||
}
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
}
|
||||
|
||||
int
|
||||
vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
||||
{
|
||||
vdev_t *vd;
|
||||
vdev_t *vd, *tvd;
|
||||
int error;
|
||||
|
||||
spa_vdev_state_enter(spa);
|
||||
|
||||
|
@ -1920,34 +1948,58 @@ vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
|||
if (!vd->vdev_ops->vdev_op_leaf)
|
||||
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
|
||||
|
||||
tvd = vd->vdev_top;
|
||||
|
||||
/*
|
||||
* If the device isn't already offline, try to offline it.
|
||||
*/
|
||||
if (!vd->vdev_offline) {
|
||||
/*
|
||||
* If this device has the only valid copy of some data,
|
||||
* don't allow it to be offlined.
|
||||
* don't allow it to be offlined. Log devices are always
|
||||
* expendable.
|
||||
*/
|
||||
if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
|
||||
if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
|
||||
vdev_dtl_required(vd))
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
|
||||
/*
|
||||
* Offline this device and reopen its top-level vdev.
|
||||
* If this action results in the top-level vdev becoming
|
||||
* unusable, undo it and fail the request.
|
||||
* If the top-level vdev is a log device then just offline
|
||||
* it. Otherwise, if this action results in the top-level
|
||||
* vdev becoming unusable, undo it and fail the request.
|
||||
*/
|
||||
vd->vdev_offline = B_TRUE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
|
||||
vdev_reopen(tvd);
|
||||
|
||||
if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
|
||||
vdev_is_dead(tvd)) {
|
||||
vd->vdev_offline = B_FALSE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
vdev_reopen(tvd);
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
}
|
||||
}
|
||||
|
||||
vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
|
||||
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
if (!tvd->vdev_islog || !vdev_is_dead(tvd))
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
|
||||
(void) spa_vdev_state_exit(spa, vd, 0);
|
||||
|
||||
error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
|
||||
NULL, DS_FIND_CHILDREN);
|
||||
if (error) {
|
||||
(void) vdev_online(spa, guid, 0, NULL);
|
||||
return (error);
|
||||
}
|
||||
/*
|
||||
* If we successfully offlined the log device then we need to
|
||||
* sync out the current txg so that the "stubby" block can be
|
||||
* removed by zil_sync().
|
||||
*/
|
||||
txg_wait_synced(spa->spa_dsl_pool, 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2062,7 +2114,9 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
|
|||
vs->vs_scrub_errors = vd->vdev_spa->spa_scrub_errors;
|
||||
vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
|
||||
vs->vs_state = vd->vdev_state;
|
||||
vs->vs_rsize = vdev_get_rsize(vd);
|
||||
vs->vs_rsize = vdev_get_min_asize(vd);
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
/*
|
||||
|
@ -2155,14 +2209,24 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
|||
if (flags & ZIO_FLAG_SPECULATIVE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this is an I/O error that is going to be retried, then ignore the
|
||||
* error. Otherwise, the user may interpret B_FAILFAST I/O errors as
|
||||
* hard errors, when in reality they can happen for any number of
|
||||
* innocuous reasons (bus resets, MPxIO link failure, etc).
|
||||
*/
|
||||
if (zio->io_error == EIO &&
|
||||
!(zio->io_flags & ZIO_FLAG_IO_RETRY))
|
||||
return;
|
||||
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
if (type == ZIO_TYPE_READ) {
|
||||
if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
|
||||
if (zio->io_error == ECKSUM)
|
||||
vs->vs_checksum_errors++;
|
||||
else
|
||||
vs->vs_read_errors++;
|
||||
}
|
||||
if (type == ZIO_TYPE_WRITE)
|
||||
if (type == ZIO_TYPE_WRITE && !vdev_is_dead(vd))
|
||||
vs->vs_write_errors++;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
|
@ -2205,10 +2269,9 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
|||
void
|
||||
vdev_scrub_stat_update(vdev_t *vd, pool_scrub_type_t type, boolean_t complete)
|
||||
{
|
||||
int c;
|
||||
vdev_stat_t *vs = &vd->vdev_stat;
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++)
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_scrub_stat_update(vd->vdev_child[c], type, complete);
|
||||
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
|
@ -2252,6 +2315,7 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta,
|
|||
* childrens', thus not accurate enough for us.
|
||||
*/
|
||||
ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
|
||||
ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
|
||||
dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
|
||||
vd->vdev_deflate_ratio;
|
||||
|
||||
|
@ -2293,8 +2357,8 @@ vdev_config_dirty(vdev_t *vd)
|
|||
int c;
|
||||
|
||||
/*
|
||||
* If this is an aux vdev (as with l2cache devices), then we update the
|
||||
* vdev config manually and set the sync flag.
|
||||
* If this is an aux vdev (as with l2cache and spare devices), then we
|
||||
* update the vdev config manually and set the sync flag.
|
||||
*/
|
||||
if (vd->vdev_aux != NULL) {
|
||||
spa_aux_vdev_t *sav = vd->vdev_aux;
|
||||
|
@ -2316,8 +2380,11 @@ vdev_config_dirty(vdev_t *vd)
|
|||
|
||||
sav->sav_sync = B_TRUE;
|
||||
|
||||
VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
|
||||
ZPOOL_CONFIG_L2CACHE, &aux, &naux) == 0);
|
||||
if (nvlist_lookup_nvlist_array(sav->sav_config,
|
||||
ZPOOL_CONFIG_L2CACHE, &aux, &naux) != 0) {
|
||||
VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
|
||||
ZPOOL_CONFIG_SPARES, &aux, &naux) == 0);
|
||||
}
|
||||
|
||||
ASSERT(c < naux);
|
||||
|
||||
|
@ -2415,11 +2482,10 @@ vdev_propagate_state(vdev_t *vd)
|
|||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int degraded = 0, faulted = 0;
|
||||
int corrupted = 0;
|
||||
int c;
|
||||
vdev_t *child;
|
||||
|
||||
if (vd->vdev_children > 0) {
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
child = vd->vdev_child[c];
|
||||
|
||||
if (!vdev_readable(child) ||
|
||||
|
@ -2523,7 +2589,6 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
|
|||
* an error.
|
||||
*/
|
||||
if (spa->spa_load_state == SPA_LOAD_IMPORT &&
|
||||
!spa->spa_import_faulted &&
|
||||
vd->vdev_ops->vdev_op_leaf)
|
||||
vd->vdev_not_present = 1;
|
||||
|
||||
|
@ -2582,8 +2647,8 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
|
|||
vd->vdev_removed = B_FALSE;
|
||||
}
|
||||
|
||||
if (!isopen)
|
||||
vdev_propagate_state(vd);
|
||||
if (!isopen && vd->vdev_parent)
|
||||
vdev_propagate_state(vd->vdev_parent);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2595,8 +2660,6 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
|
|||
boolean_t
|
||||
vdev_is_bootable(vdev_t *vd)
|
||||
{
|
||||
int c;
|
||||
|
||||
if (!vd->vdev_ops->vdev_op_leaf) {
|
||||
char *vdev_type = vd->vdev_ops->vdev_op_type;
|
||||
|
||||
|
@ -2611,9 +2674,53 @@ vdev_is_bootable(vdev_t *vd)
|
|||
return (B_FALSE);
|
||||
}
|
||||
|
||||
for (c = 0; c < vd->vdev_children; c++) {
|
||||
for (int c = 0; c < vd->vdev_children; c++) {
|
||||
if (!vdev_is_bootable(vd->vdev_child[c]))
|
||||
return (B_FALSE);
|
||||
}
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
|
||||
{
|
||||
uint_t children;
|
||||
nvlist_t **child;
|
||||
uint64_t val;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (int c = 0; c < children; c++)
|
||||
vdev_load_log_state(vd->vdev_child[c], child[c]);
|
||||
}
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
|
||||
ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
|
||||
|
||||
/*
|
||||
* It would be nice to call vdev_offline()
|
||||
* directly but the pool isn't fully loaded and
|
||||
* the txg threads have not been started yet.
|
||||
*/
|
||||
spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
|
||||
vd->vdev_offline = val;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
spa_config_exit(spa, SCL_STATE_ALL, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Expand a vdev if possible.
|
||||
*/
|
||||
void
|
||||
vdev_expand(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
ASSERT(vd->vdev_top == vd);
|
||||
ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
|
||||
|
||||
if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) {
|
||||
VERIFY(vdev_metaslab_init(vd, txg) == 0);
|
||||
vdev_config_dirty(vd);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -233,6 +233,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
|
|||
VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
|
||||
vd->vdev_physpath) == 0);
|
||||
|
||||
if (vd->vdev_fru != NULL)
|
||||
VERIFY(nvlist_add_string(nv, ZPOOL_CONFIG_FRU,
|
||||
vd->vdev_fru) == 0);
|
||||
|
||||
if (vd->vdev_nparity != 0) {
|
||||
ASSERT(strcmp(vd->vdev_ops->vdev_op_type,
|
||||
VDEV_TYPE_RAIDZ) == 0);
|
||||
|
@ -335,8 +339,8 @@ vdev_label_read_config(vdev_t *vd)
|
|||
nvlist_t *config = NULL;
|
||||
vdev_phys_t *vp;
|
||||
zio_t *zio;
|
||||
int flags =
|
||||
ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
|
@ -345,6 +349,7 @@ vdev_label_read_config(vdev_t *vd)
|
|||
|
||||
vp = zio_buf_alloc(sizeof (vdev_phys_t));
|
||||
|
||||
retry:
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
|
@ -364,6 +369,11 @@ vdev_label_read_config(vdev_t *vd)
|
|||
}
|
||||
}
|
||||
|
||||
if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
zio_buf_free(vp, sizeof (vdev_phys_t));
|
||||
|
||||
return (config);
|
||||
|
@ -490,7 +500,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
|||
spa_t *spa = vd->vdev_spa;
|
||||
nvlist_t *label;
|
||||
vdev_phys_t *vp;
|
||||
vdev_boot_header_t *vb;
|
||||
char *pad2;
|
||||
uberblock_t *ub;
|
||||
zio_t *zio;
|
||||
char *buf;
|
||||
|
@ -631,16 +641,6 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
|||
return (error == EFAULT ? ENAMETOOLONG : EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize boot block header.
|
||||
*/
|
||||
vb = zio_buf_alloc(sizeof (vdev_boot_header_t));
|
||||
bzero(vb, sizeof (vdev_boot_header_t));
|
||||
vb->vb_magic = VDEV_BOOT_MAGIC;
|
||||
vb->vb_version = VDEV_BOOT_VERSION;
|
||||
vb->vb_offset = VDEV_BOOT_OFFSET;
|
||||
vb->vb_size = VDEV_BOOT_SIZE;
|
||||
|
||||
/*
|
||||
* Initialize uberblock template.
|
||||
*/
|
||||
|
@ -649,9 +649,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
|||
*ub = spa->spa_uberblock;
|
||||
ub->ub_txg = 0;
|
||||
|
||||
/* Initialize the 2nd padding area. */
|
||||
pad2 = zio_buf_alloc(VDEV_PAD_SIZE);
|
||||
bzero(pad2, VDEV_PAD_SIZE);
|
||||
|
||||
/*
|
||||
* Write everything in parallel.
|
||||
*/
|
||||
retry:
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
|
@ -660,9 +665,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
|||
offsetof(vdev_label_t, vl_vdev_phys),
|
||||
sizeof (vdev_phys_t), NULL, NULL, flags);
|
||||
|
||||
vdev_label_write(zio, vd, l, vb,
|
||||
offsetof(vdev_label_t, vl_boot_header),
|
||||
sizeof (vdev_boot_header_t), NULL, NULL, flags);
|
||||
/*
|
||||
* Skip the 1st padding area.
|
||||
* Zero out the 2nd padding area where it might have
|
||||
* left over data from previous filesystem format.
|
||||
*/
|
||||
vdev_label_write(zio, vd, l, pad2,
|
||||
offsetof(vdev_label_t, vl_pad2),
|
||||
VDEV_PAD_SIZE, NULL, NULL, flags);
|
||||
|
||||
for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
|
||||
vdev_label_write(zio, vd, l, ub,
|
||||
|
@ -673,9 +683,14 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
|||
|
||||
error = zio_wait(zio);
|
||||
|
||||
if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
nvlist_free(label);
|
||||
zio_buf_free(pad2, VDEV_PAD_SIZE);
|
||||
zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
|
||||
zio_buf_free(vb, sizeof (vdev_boot_header_t));
|
||||
zio_buf_free(vp, sizeof (vdev_phys_t));
|
||||
|
||||
/*
|
||||
|
@ -759,8 +774,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
|
|||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int flags =
|
||||
ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (vd == rvd) {
|
||||
ASSERT(zio == NULL);
|
||||
|
@ -998,7 +1013,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
|
|||
* at any time, you can just call it again, and it will resume its work.
|
||||
*/
|
||||
int
|
||||
vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
|
||||
vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
|
||||
{
|
||||
spa_t *spa = svd[0]->vdev_spa;
|
||||
uberblock_t *ub = &spa->spa_uberblock;
|
||||
|
@ -1007,6 +1022,16 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
|
|||
int error;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
|
||||
|
||||
/*
|
||||
* Normally, we don't want to try too hard to write every label and
|
||||
* uberblock. If there is a flaky disk, we don't want the rest of the
|
||||
* sync process to block while we retry. But if we can't write a
|
||||
* single label out, we should retry with ZIO_FLAG_TRYHARD before
|
||||
* bailing out and declaring the pool faulted.
|
||||
*/
|
||||
if (tryhard)
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
|
||||
ASSERT(ub->ub_txg <= txg);
|
||||
|
||||
/*
|
||||
|
|
|
@ -48,10 +48,11 @@ int zfs_vdev_time_shift = 6;
|
|||
int zfs_vdev_ramp_rate = 2;
|
||||
|
||||
/*
|
||||
* i/os will be aggregated into a single large i/o up to
|
||||
* zfs_vdev_aggregation_limit bytes long.
|
||||
* To reduce IOPs, we aggregate small adjacent i/os into one large i/o.
|
||||
* For read i/os, we also aggregate across small adjacency gaps.
|
||||
*/
|
||||
int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
|
||||
int zfs_vdev_read_gap_limit = 32 << 10;
|
||||
|
||||
/*
|
||||
* Virtual device vector for disk I/O scheduling.
|
||||
|
@ -159,16 +160,23 @@ vdev_queue_agg_io_done(zio_t *aio)
|
|||
zio_buf_free(aio->io_data, aio->io_size);
|
||||
}
|
||||
|
||||
#define IS_ADJACENT(io, nio) \
|
||||
((io)->io_offset + (io)->io_size == (nio)->io_offset)
|
||||
/*
|
||||
* Compute the range spanned by two i/os, which is the endpoint of the last
|
||||
* (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
|
||||
* Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
|
||||
* thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
|
||||
*/
|
||||
#define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
|
||||
#define IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
|
||||
|
||||
static zio_t *
|
||||
vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
||||
{
|
||||
zio_t *fio, *lio, *aio, *dio, *nio;
|
||||
avl_tree_t *t;
|
||||
uint64_t size;
|
||||
int flags;
|
||||
uint64_t maxspan = zfs_vdev_aggregation_limit;
|
||||
uint64_t maxgap;
|
||||
|
||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||
|
||||
|
@ -179,8 +187,8 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
|||
fio = lio = avl_first(&vq->vq_deadline_tree);
|
||||
|
||||
t = fio->io_vdev_tree;
|
||||
size = fio->io_size;
|
||||
flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
|
||||
maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
|
||||
|
||||
if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
|
||||
/*
|
||||
|
@ -191,22 +199,18 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
|||
* scrub/resilver, can be preserved in the aggregate.
|
||||
*/
|
||||
while ((dio = AVL_PREV(t, fio)) != NULL &&
|
||||
IS_ADJACENT(dio, fio) &&
|
||||
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
|
||||
size + dio->io_size <= zfs_vdev_aggregation_limit) {
|
||||
IO_SPAN(dio, lio) <= maxspan && IO_GAP(dio, fio) <= maxgap)
|
||||
fio = dio;
|
||||
size += dio->io_size;
|
||||
}
|
||||
|
||||
while ((dio = AVL_NEXT(t, lio)) != NULL &&
|
||||
IS_ADJACENT(lio, dio) &&
|
||||
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
|
||||
size + dio->io_size <= zfs_vdev_aggregation_limit) {
|
||||
IO_SPAN(fio, dio) <= maxspan && IO_GAP(lio, dio) <= maxgap)
|
||||
lio = dio;
|
||||
size += dio->io_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (fio != lio) {
|
||||
uint64_t size = IO_SPAN(fio, lio);
|
||||
ASSERT(size <= zfs_vdev_aggregation_limit);
|
||||
|
||||
aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
|
||||
|
@ -214,9 +218,10 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
|||
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
|
||||
vdev_queue_agg_io_done, NULL);
|
||||
|
||||
/* We want to process lio, then stop */
|
||||
lio = AVL_NEXT(t, lio);
|
||||
for (dio = fio; dio != lio; dio = nio) {
|
||||
nio = fio;
|
||||
do {
|
||||
dio = nio;
|
||||
nio = AVL_NEXT(t, dio);
|
||||
ASSERT(dio->io_type == aio->io_type);
|
||||
ASSERT(dio->io_vdev_tree == t);
|
||||
|
||||
|
@ -224,13 +229,12 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
|
|||
bcopy(dio->io_data, (char *)aio->io_data +
|
||||
(dio->io_offset - aio->io_offset),
|
||||
dio->io_size);
|
||||
nio = AVL_NEXT(t, dio);
|
||||
|
||||
zio_add_child(dio, aio);
|
||||
vdev_queue_io_remove(vq, dio);
|
||||
zio_vdev_io_bypass(dio);
|
||||
zio_execute(dio);
|
||||
}
|
||||
} while (dio != lio);
|
||||
|
||||
avl_add(&vq->vq_pending_tree, aio);
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -697,7 +697,7 @@ vdev_raidz_io_start(zio_t *zio)
|
|||
continue;
|
||||
}
|
||||
if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
|
||||
(zio->io_flags & ZIO_FLAG_SCRUB)) {
|
||||
(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
|
||||
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
|
||||
rc->rc_offset, rc->rc_data, rc->rc_size,
|
||||
zio->io_type, zio->io_priority, 0,
|
||||
|
|
|
@ -19,13 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
|
||||
/*
|
||||
* This file contains the top half of the zfs directory structure
|
||||
* implementation. The bottom half is in zap_leaf.c.
|
||||
|
@ -45,6 +42,7 @@
|
|||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zap_impl.h>
|
||||
|
@ -1134,3 +1132,58 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
|
||||
uint64_t *tooverwrite)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
zap_leaf_t *l;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Account for the header block of the fatzap.
|
||||
*/
|
||||
if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
|
||||
tooverwrite += zap->zap_dbuf->db_size;
|
||||
} else {
|
||||
towrite += zap->zap_dbuf->db_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Account for the pointer table blocks.
|
||||
* If we are adding we need to account for the following cases :
|
||||
* - If the pointer table is embedded, this operation could force an
|
||||
* external pointer table.
|
||||
* - If this already has an external pointer table this operation
|
||||
* could extend the table.
|
||||
*/
|
||||
if (add) {
|
||||
if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
|
||||
towrite += zap->zap_dbuf->db_size;
|
||||
else
|
||||
towrite += (zap->zap_dbuf->db_size * 3);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now, check if the block containing leaf is freeable
|
||||
* and account accordingly.
|
||||
*/
|
||||
err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
|
||||
if (err != 0) {
|
||||
return (err);
|
||||
}
|
||||
|
||||
if (!add && dmu_buf_freeable(l->l_dbuf)) {
|
||||
tooverwrite += l->l_dbuf->db_size;
|
||||
} else {
|
||||
/*
|
||||
* If this an add operation, the leaf block could split.
|
||||
* Hence, we need to account for an additional leaf block.
|
||||
*/
|
||||
towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
|
||||
}
|
||||
|
||||
zap_put_leaf(l);
|
||||
return (0);
|
||||
}
|
||||
|
|
|
@ -19,24 +19,23 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* The 512-byte leaf is broken into 32 16-byte chunks.
|
||||
* chunk number n means l_chunk[n], even though the header precedes it.
|
||||
* the names are stored null-terminated.
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zap_impl.h>
|
||||
#include <sys/zap_leaf.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/dmu.h>
|
||||
|
||||
static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
|
||||
|
||||
|
|
|
@ -19,12 +19,10 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zfs_context.h>
|
||||
|
@ -78,8 +76,8 @@ zap_normalize(zap_t *zap, const char *name, char *namenorm)
|
|||
|
||||
err = 0;
|
||||
(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
|
||||
zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL, U8_UNICODE_LATEST,
|
||||
&err);
|
||||
zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
|
||||
U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
@ -1067,3 +1065,79 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
|
|||
zap_unlockdir(zap);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
uint64_t *towrite, uint64_t *tooverwrite, uint64_t dn_datablkshift)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err = 0;
|
||||
|
||||
|
||||
/*
|
||||
* Since, we don't have a name, we cannot figure out which blocks will
|
||||
* be affected in this operation. So, account for the worst case :
|
||||
* - 3 blocks overwritten: target leaf, ptrtbl block, header block
|
||||
* - 4 new blocks written if adding:
|
||||
* - 2 blocks for possibly split leaves,
|
||||
* - 2 grown ptrtbl blocks
|
||||
*
|
||||
* This also accomodates the case where an add operation to a fairly
|
||||
* large microzap results in a promotion to fatzap.
|
||||
*/
|
||||
if (name == NULL) {
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* We lock the zap with adding == FALSE. Because, if we pass
|
||||
* the actual value of add, it could trigger a mzap_upgrade().
|
||||
* At present we are just evaluating the possibility of this operation
|
||||
* and hence we donot want to trigger an upgrade.
|
||||
*/
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
if (!zap->zap_ismicro) {
|
||||
zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
|
||||
if (zn) {
|
||||
err = fzap_count_write(zn, add, towrite,
|
||||
tooverwrite);
|
||||
zap_name_free(zn);
|
||||
} else {
|
||||
/*
|
||||
* We treat this case as similar to (name == NULL)
|
||||
*/
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
} else {
|
||||
if (!add) {
|
||||
if (dmu_buf_freeable(zap->zap_dbuf))
|
||||
*tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
*towrite += SPA_MAXBLOCKSIZE;
|
||||
} else {
|
||||
/*
|
||||
* We are here if we are adding and (name != NULL).
|
||||
* It is hard to find out if this add will promote this
|
||||
* microzap to fatzap. Hence, we assume the worst case
|
||||
* and account for the blocks assuming this microzap
|
||||
* would be promoted to a fatzap.
|
||||
*
|
||||
* 1 block overwritten : header block
|
||||
* 4 new blocks written : 2 new split leaf, 2 grown
|
||||
* ptrtbl blocks
|
||||
*/
|
||||
if (dmu_buf_freeable(zap->zap_dbuf))
|
||||
*tooverwrite += 1 << dn_datablkshift;
|
||||
else
|
||||
*towrite += 1 << dn_datablkshift;
|
||||
*towrite += 4 << dn_datablkshift;
|
||||
}
|
||||
}
|
||||
|
||||
zap_unlockdir(zap);
|
||||
return (err);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -65,15 +65,16 @@
|
|||
ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
|
||||
#define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \
|
||||
ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS)
|
||||
#define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
|
||||
|
||||
#define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \
|
||||
ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \
|
||||
ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \
|
||||
ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE)
|
||||
|
||||
#define WRITE_MASK (WRITE_MASK_DATA|ACE_WRITE_ATTRIBUTES|ACE_WRITE_ACL|\
|
||||
ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD)
|
||||
#define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS)
|
||||
#define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \
|
||||
ACE_DELETE|ACE_DELETE_CHILD)
|
||||
#define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS)
|
||||
|
||||
#define OGE_CLEAR (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
|
||||
ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE)
|
||||
|
@ -538,8 +539,9 @@ zfs_acl_curr_node(zfs_acl_t *aclp)
|
|||
* ACE FUIDs will be created later.
|
||||
*/
|
||||
int
|
||||
zfs_copy_ace_2_fuid(vtype_t obj_type, zfs_acl_t *aclp, void *datap,
|
||||
zfs_ace_t *z_acl, int aclcnt, size_t *size)
|
||||
zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp,
|
||||
void *datap, zfs_ace_t *z_acl, int aclcnt, size_t *size,
|
||||
zfs_fuid_info_t **fuidp, cred_t *cr)
|
||||
{
|
||||
int i;
|
||||
uint16_t entry_type;
|
||||
|
@ -555,9 +557,9 @@ zfs_copy_ace_2_fuid(vtype_t obj_type, zfs_acl_t *aclp, void *datap,
|
|||
entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
|
||||
if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
|
||||
entry_type != ACE_EVERYONE) {
|
||||
if (!aclp->z_has_fuids)
|
||||
aclp->z_has_fuids = IS_EPHEMERAL(acep->a_who);
|
||||
aceptr->z_fuid = (uint64_t)acep->a_who;
|
||||
aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
|
||||
cr, (entry_type == 0) ?
|
||||
ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -682,7 +684,7 @@ zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep,
|
|||
* convert old ACL format to new
|
||||
*/
|
||||
void
|
||||
zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp)
|
||||
zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr)
|
||||
{
|
||||
zfs_oldace_t *oldaclp;
|
||||
int i;
|
||||
|
@ -714,9 +716,9 @@ zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp)
|
|||
newaclnode = zfs_acl_node_alloc(aclp->z_acl_count *
|
||||
sizeof (zfs_object_ace_t));
|
||||
aclp->z_ops = zfs_acl_fuid_ops;
|
||||
VERIFY(zfs_copy_ace_2_fuid(ZTOV(zp)->v_type, aclp, oldaclp,
|
||||
newaclnode->z_acldata, aclp->z_acl_count,
|
||||
&newaclnode->z_size) == 0);
|
||||
VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp,
|
||||
oldaclp, newaclnode->z_acldata, aclp->z_acl_count,
|
||||
&newaclnode->z_size, NULL, cr) == 0);
|
||||
newaclnode->z_ace_count = aclp->z_acl_count;
|
||||
aclp->z_version = ZFS_ACL_VERSION;
|
||||
kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t));
|
||||
|
@ -770,8 +772,7 @@ zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask,
|
|||
* Also, create FUIDs for any User/Group ACEs
|
||||
*/
|
||||
static uint64_t
|
||||
zfs_mode_fuid_compute(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
||||
zfs_fuid_info_t **fuidp, dmu_tx_t *tx)
|
||||
zfs_mode_compute(znode_t *zp, zfs_acl_t *aclp)
|
||||
{
|
||||
int entry_type;
|
||||
mode_t mode;
|
||||
|
@ -905,15 +906,6 @@ zfs_mode_fuid_compute(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
|||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Now handle FUID create for user/group ACEs
|
||||
*/
|
||||
if (entry_type == 0 || entry_type == ACE_IDENTIFIER_GROUP) {
|
||||
aclp->z_ops.ace_who_set(acep,
|
||||
zfs_fuid_create(zp->z_zfsvfs, who, cr,
|
||||
(entry_type == 0) ? ZFS_ACE_USER : ZFS_ACE_GROUP,
|
||||
tx, fuidp));
|
||||
}
|
||||
}
|
||||
return (mode);
|
||||
}
|
||||
|
@ -989,7 +981,7 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
|
|||
aclnode = zfs_acl_node_alloc(aclsize);
|
||||
list_insert_head(&aclp->z_acl, aclnode);
|
||||
error = dmu_read(zp->z_zfsvfs->z_os, extacl, 0,
|
||||
aclsize, aclnode->z_acldata);
|
||||
aclsize, aclnode->z_acldata, DMU_READ_PREFETCH);
|
||||
aclnode->z_ace_count = acl_count;
|
||||
aclp->z_acl_count = acl_count;
|
||||
aclp->z_acl_bytes = aclsize;
|
||||
|
@ -1014,8 +1006,7 @@ zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify)
|
|||
* already checked the acl and knows whether to inherit.
|
||||
*/
|
||||
int
|
||||
zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
||||
zfs_fuid_info_t **fuidp, dmu_tx_t *tx)
|
||||
zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
|
||||
{
|
||||
int error;
|
||||
znode_phys_t *zphys = zp->z_phys;
|
||||
|
@ -1026,12 +1017,9 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
|||
dmu_object_type_t otype;
|
||||
zfs_acl_node_t *aclnode;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zp->z_lock));
|
||||
ASSERT(MUTEX_HELD(&zp->z_acl_lock));
|
||||
|
||||
dmu_buf_will_dirty(zp->z_dbuf, tx);
|
||||
|
||||
zphys->zp_mode = zfs_mode_fuid_compute(zp, aclp, cr, fuidp, tx);
|
||||
zphys->zp_mode = zfs_mode_compute(zp, aclp);
|
||||
|
||||
/*
|
||||
* Decide which opbject type to use. If we are forced to
|
||||
|
@ -1043,7 +1031,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
|||
} else {
|
||||
if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
|
||||
(zfsvfs->z_version >= ZPL_VERSION_FUID))
|
||||
zfs_acl_xform(zp, aclp);
|
||||
zfs_acl_xform(zp, aclp, cr);
|
||||
ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
|
||||
otype = DMU_OT_ACL;
|
||||
}
|
||||
|
@ -1125,7 +1113,6 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr,
|
|||
if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0)
|
||||
zp->z_phys->zp_flags |= ZFS_ACL_TRIVIAL;
|
||||
|
||||
zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -1336,7 +1323,7 @@ zfs_acl_ace_insert(zfs_acl_t *aclp, void *acep)
|
|||
* Prepend deny ACE
|
||||
*/
|
||||
static void *
|
||||
zfs_acl_prepend_deny(znode_t *zp, zfs_acl_t *aclp, void *acep,
|
||||
zfs_acl_prepend_deny(uint64_t uid, zfs_acl_t *aclp, void *acep,
|
||||
mode_t mode)
|
||||
{
|
||||
zfs_acl_node_t *aclnode;
|
||||
|
@ -1349,7 +1336,7 @@ zfs_acl_prepend_deny(znode_t *zp, zfs_acl_t *aclp, void *acep,
|
|||
fuid = aclp->z_ops.ace_who_get(acep);
|
||||
flags = aclp->z_ops.ace_flags_get(acep);
|
||||
zfs_set_ace(aclp, newacep, 0, DENY, fuid, (flags & ACE_TYPE_FLAGS));
|
||||
zfs_acl_prepend_fixup(aclp, newacep, acep, mode, zp->z_phys->zp_uid);
|
||||
zfs_acl_prepend_fixup(aclp, newacep, acep, mode, uid);
|
||||
|
||||
return (newacep);
|
||||
}
|
||||
|
@ -1473,9 +1460,9 @@ zfs_fixup_group_entries(zfs_acl_t *aclp, void *acep, void *prevacep,
|
|||
* in PSARC/2002/240
|
||||
*/
|
||||
static void
|
||||
zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp)
|
||||
zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t uid,
|
||||
uint64_t mode, zfs_acl_t *aclp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
void *acep = NULL, *prevacep = NULL;
|
||||
uint64_t who;
|
||||
int i;
|
||||
|
@ -1485,11 +1472,6 @@ zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp)
|
|||
uint16_t iflags, type;
|
||||
uint32_t access_mask;
|
||||
|
||||
ASSERT(MUTEX_HELD(&zp->z_acl_lock));
|
||||
ASSERT(MUTEX_HELD(&zp->z_lock));
|
||||
|
||||
aclp->z_hints = (zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS);
|
||||
|
||||
/*
|
||||
* If discard then just discard all ACL nodes which
|
||||
* represent the ACEs.
|
||||
|
@ -1554,17 +1536,15 @@ zfs_acl_chmod(znode_t *zp, uint64_t mode, zfs_acl_t *aclp)
|
|||
|
||||
if (!reuse_deny) {
|
||||
prevacep =
|
||||
zfs_acl_prepend_deny(zp,
|
||||
zfs_acl_prepend_deny(uid,
|
||||
aclp, acep, mode);
|
||||
} else {
|
||||
zfs_acl_prepend_fixup(
|
||||
aclp, prevacep,
|
||||
acep, mode,
|
||||
zp->z_phys->zp_uid);
|
||||
acep, mode, uid);
|
||||
}
|
||||
zfs_fixup_group_entries(aclp, acep,
|
||||
prevacep, mode);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1623,8 +1603,10 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
|
|||
mutex_enter(&zp->z_acl_lock);
|
||||
*aclp = NULL;
|
||||
error = zfs_acl_node_read(zp, aclp, B_TRUE);
|
||||
if (error == 0)
|
||||
zfs_acl_chmod(zp, mode, *aclp);
|
||||
if (error == 0) {
|
||||
(*aclp)->z_hints = zp->z_phys->zp_flags & V4_ACL_WIDE_FLAGS;
|
||||
zfs_acl_chmod(zp->z_zfsvfs, zp->z_phys->zp_uid, mode, *aclp);
|
||||
}
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
mutex_exit(&zp->z_lock);
|
||||
return (error);
|
||||
|
@ -1649,9 +1631,8 @@ zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
|
|||
* Should ACE be inherited?
|
||||
*/
|
||||
static int
|
||||
zfs_ace_can_use(znode_t *zp, uint16_t acep_flags)
|
||||
zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags)
|
||||
{
|
||||
int vtype = ZTOV(zp)->v_type;
|
||||
int iflags = (acep_flags & 0xf);
|
||||
|
||||
if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE))
|
||||
|
@ -1666,10 +1647,9 @@ zfs_ace_can_use(znode_t *zp, uint16_t acep_flags)
|
|||
* inherit inheritable ACEs from parent
|
||||
*/
|
||||
static zfs_acl_t *
|
||||
zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp, uint64_t mode,
|
||||
boolean_t *need_chmod)
|
||||
zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp,
|
||||
uint64_t mode, boolean_t *need_chmod)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
void *pacep;
|
||||
void *acep, *acep2;
|
||||
zfs_acl_node_t *aclnode, *aclnode2;
|
||||
|
@ -1680,8 +1660,8 @@ zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp, uint64_t mode,
|
|||
size_t ace_size;
|
||||
void *data1, *data2;
|
||||
size_t data1sz, data2sz;
|
||||
boolean_t vdir = ZTOV(zp)->v_type == VDIR;
|
||||
boolean_t vreg = ZTOV(zp)->v_type == VREG;
|
||||
boolean_t vdir = vtype == VDIR;
|
||||
boolean_t vreg = vtype == VREG;
|
||||
boolean_t passthrough, passthrough_x, noallow;
|
||||
|
||||
passthrough_x =
|
||||
|
@ -1710,7 +1690,7 @@ zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp, uint64_t mode,
|
|||
|
||||
ace_size = aclp->z_ops.ace_size(pacep);
|
||||
|
||||
if (!zfs_ace_can_use(zp, iflags))
|
||||
if (!zfs_ace_can_use(vtype, iflags))
|
||||
continue;
|
||||
|
||||
/*
|
||||
|
@ -1806,55 +1786,58 @@ zfs_acl_inherit(znode_t *zp, zfs_acl_t *paclp, uint64_t mode,
|
|||
* Create file system object initial permissions
|
||||
* including inheritable ACEs.
|
||||
*/
|
||||
void
|
||||
zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
|
||||
vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
||||
zfs_acl_t *setaclp, zfs_fuid_info_t **fuidp)
|
||||
int
|
||||
zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
|
||||
vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
|
||||
{
|
||||
uint64_t mode, fuid, fgid;
|
||||
int error;
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
zfs_acl_t *aclp = NULL;
|
||||
zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
|
||||
zfs_acl_t *paclp;
|
||||
xvattr_t *xvap = (xvattr_t *)vap;
|
||||
gid_t gid;
|
||||
boolean_t need_chmod = B_TRUE;
|
||||
|
||||
if (setaclp)
|
||||
aclp = setaclp;
|
||||
bzero(acl_ids, sizeof (zfs_acl_ids_t));
|
||||
acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode);
|
||||
|
||||
mode = MAKEIMODE(vap->va_type, vap->va_mode);
|
||||
if (vsecp)
|
||||
if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr,
|
||||
&acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Determine uid and gid.
|
||||
*/
|
||||
if ((flag & (IS_ROOT_NODE | IS_REPLAY)) ||
|
||||
((flag & IS_XATTR) && (vap->va_type == VDIR))) {
|
||||
fuid = zfs_fuid_create(zfsvfs, vap->va_uid, cr,
|
||||
ZFS_OWNER, tx, fuidp);
|
||||
fgid = zfs_fuid_create(zfsvfs, vap->va_gid, cr,
|
||||
ZFS_GROUP, tx, fuidp);
|
||||
acl_ids->z_fuid = zfs_fuid_create(zfsvfs,
|
||||
(uint64_t)vap->va_uid, cr,
|
||||
ZFS_OWNER, &acl_ids->z_fuidp);
|
||||
acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
|
||||
(uint64_t)vap->va_gid, cr,
|
||||
ZFS_GROUP, &acl_ids->z_fuidp);
|
||||
gid = vap->va_gid;
|
||||
} else {
|
||||
fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER, tx, cr, fuidp);
|
||||
fgid = 0;
|
||||
acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
|
||||
cr, &acl_ids->z_fuidp);
|
||||
acl_ids->z_fgid = 0;
|
||||
if (vap->va_mask & AT_GID) {
|
||||
fgid = zfs_fuid_create(zfsvfs, vap->va_gid, cr,
|
||||
ZFS_GROUP, tx, fuidp);
|
||||
acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
|
||||
(uint64_t)vap->va_gid,
|
||||
cr, ZFS_GROUP, &acl_ids->z_fuidp);
|
||||
gid = vap->va_gid;
|
||||
if (fgid != parent->z_phys->zp_gid &&
|
||||
if (acl_ids->z_fgid != dzp->z_phys->zp_gid &&
|
||||
!groupmember(vap->va_gid, cr) &&
|
||||
secpolicy_vnode_create_gid(cr) != 0)
|
||||
fgid = 0;
|
||||
acl_ids->z_fgid = 0;
|
||||
}
|
||||
if (fgid == 0) {
|
||||
if (parent->z_phys->zp_mode & S_ISGID) {
|
||||
fgid = parent->z_phys->zp_gid;
|
||||
gid = zfs_fuid_map_id(zfsvfs, fgid,
|
||||
if (acl_ids->z_fgid == 0) {
|
||||
if (dzp->z_phys->zp_mode & S_ISGID) {
|
||||
acl_ids->z_fgid = dzp->z_phys->zp_gid;
|
||||
gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
|
||||
cr, ZFS_GROUP);
|
||||
} else {
|
||||
fgid = zfs_fuid_create_cred(zfsvfs,
|
||||
ZFS_GROUP, tx, cr, fuidp);
|
||||
acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
|
||||
ZFS_GROUP, cr, &acl_ids->z_fuidp);
|
||||
gid = crgetgid(cr);
|
||||
}
|
||||
}
|
||||
|
@ -1867,57 +1850,61 @@ zfs_perm_init(znode_t *zp, znode_t *parent, int flag,
|
|||
* file's new group, clear the file's set-GID bit.
|
||||
*/
|
||||
|
||||
if ((parent->z_phys->zp_mode & S_ISGID) && (vap->va_type == VDIR)) {
|
||||
mode |= S_ISGID;
|
||||
if (!(flag & IS_ROOT_NODE) && (dzp->z_phys->zp_mode & S_ISGID) &&
|
||||
(vap->va_type == VDIR)) {
|
||||
acl_ids->z_mode |= S_ISGID;
|
||||
} else {
|
||||
if ((mode & S_ISGID) &&
|
||||
if ((acl_ids->z_mode & S_ISGID) &&
|
||||
secpolicy_vnode_setids_setgids(cr, gid) != 0)
|
||||
mode &= ~S_ISGID;
|
||||
acl_ids->z_mode &= ~S_ISGID;
|
||||
}
|
||||
|
||||
zp->z_phys->zp_uid = fuid;
|
||||
zp->z_phys->zp_gid = fgid;
|
||||
zp->z_phys->zp_mode = mode;
|
||||
|
||||
if (aclp == NULL) {
|
||||
mutex_enter(&parent->z_lock);
|
||||
if ((ZTOV(parent)->v_type == VDIR &&
|
||||
(parent->z_phys->zp_flags & ZFS_INHERIT_ACE)) &&
|
||||
!(zp->z_phys->zp_flags & ZFS_XATTR)) {
|
||||
mutex_enter(&parent->z_acl_lock);
|
||||
VERIFY(0 == zfs_acl_node_read(parent, &paclp, B_FALSE));
|
||||
mutex_exit(&parent->z_acl_lock);
|
||||
aclp = zfs_acl_inherit(zp, paclp, mode, &need_chmod);
|
||||
if (acl_ids->z_aclp == NULL) {
|
||||
mutex_enter(&dzp->z_lock);
|
||||
if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR &&
|
||||
(dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)) &&
|
||||
!(dzp->z_phys->zp_flags & ZFS_XATTR)) {
|
||||
mutex_enter(&dzp->z_acl_lock);
|
||||
VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE));
|
||||
mutex_exit(&dzp->z_acl_lock);
|
||||
acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
|
||||
vap->va_type, paclp, acl_ids->z_mode, &need_chmod);
|
||||
zfs_acl_free(paclp);
|
||||
} else {
|
||||
aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
|
||||
acl_ids->z_aclp =
|
||||
zfs_acl_alloc(zfs_acl_version_zp(dzp));
|
||||
}
|
||||
mutex_exit(&dzp->z_lock);
|
||||
if (need_chmod) {
|
||||
acl_ids->z_aclp->z_hints = (vap->va_type == VDIR) ?
|
||||
ZFS_ACL_AUTO_INHERIT : 0;
|
||||
zfs_acl_chmod(zfsvfs, acl_ids->z_fuid,
|
||||
acl_ids->z_mode, acl_ids->z_aclp);
|
||||
}
|
||||
mutex_exit(&parent->z_lock);
|
||||
mutex_enter(&zp->z_lock);
|
||||
mutex_enter(&zp->z_acl_lock);
|
||||
if (need_chmod)
|
||||
zfs_acl_chmod(zp, mode, aclp);
|
||||
} else {
|
||||
mutex_enter(&zp->z_lock);
|
||||
mutex_enter(&zp->z_acl_lock);
|
||||
}
|
||||
|
||||
/* Force auto_inherit on all new directory objects */
|
||||
if (vap->va_type == VDIR)
|
||||
aclp->z_hints |= ZFS_ACL_AUTO_INHERIT;
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = zfs_aclset_common(zp, aclp, cr, fuidp, tx);
|
||||
/*
|
||||
* Free ACL and fuid_infop, but not the acl_ids structure
|
||||
*/
|
||||
void
|
||||
zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
|
||||
{
|
||||
if (acl_ids->z_aclp)
|
||||
zfs_acl_free(acl_ids->z_aclp);
|
||||
if (acl_ids->z_fuidp)
|
||||
zfs_fuid_info_free(acl_ids->z_fuidp);
|
||||
acl_ids->z_aclp = NULL;
|
||||
acl_ids->z_fuidp = NULL;
|
||||
}
|
||||
|
||||
/* Set optional attributes if any */
|
||||
if (vap->va_mask & AT_XVATTR)
|
||||
zfs_xvattr_set(zp, xvap);
|
||||
|
||||
mutex_exit(&zp->z_lock);
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
ASSERT3U(error, ==, 0);
|
||||
|
||||
if (aclp != setaclp)
|
||||
zfs_acl_free(aclp);
|
||||
boolean_t
|
||||
zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
|
||||
{
|
||||
return (zfs_usergroup_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
|
||||
zfs_usergroup_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2018,7 +2005,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
|
|||
|
||||
int
|
||||
zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
|
||||
vsecattr_t *vsecp, zfs_acl_t **zaclp)
|
||||
vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
|
||||
{
|
||||
zfs_acl_t *aclp;
|
||||
zfs_acl_node_t *aclnode;
|
||||
|
@ -2041,9 +2028,9 @@ zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type,
|
|||
return (error);
|
||||
}
|
||||
} else {
|
||||
if ((error = zfs_copy_ace_2_fuid(obj_type, aclp,
|
||||
if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp,
|
||||
vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
|
||||
&aclnode->z_size)) != 0) {
|
||||
&aclnode->z_size, fuidp, cr)) != 0) {
|
||||
zfs_acl_free(aclp);
|
||||
zfs_acl_node_free(aclnode);
|
||||
return (error);
|
||||
|
@ -2084,6 +2071,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
|
|||
int error;
|
||||
zfs_acl_t *aclp;
|
||||
zfs_fuid_info_t *fuidp = NULL;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
if (mask == 0)
|
||||
return (ENOSYS);
|
||||
|
@ -2094,7 +2082,8 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
|
|||
if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr))
|
||||
return (error);
|
||||
|
||||
error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, &aclp);
|
||||
error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp,
|
||||
&aclp);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
|
@ -2135,18 +2124,9 @@ top:
|
|||
} else if (aclp->z_acl_bytes > ZFS_ACE_SPACE) {
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes);
|
||||
}
|
||||
if (aclp->z_has_fuids) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
|
||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (error) {
|
||||
|
@ -2163,9 +2143,13 @@ top:
|
|||
return (error);
|
||||
}
|
||||
|
||||
error = zfs_aclset_common(zp, aclp, cr, &fuidp, tx);
|
||||
error = zfs_aclset_common(zp, aclp, cr, tx);
|
||||
ASSERT(error == 0);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
zfs_time_stamper_locked(zp, STATE_CHANGED, tx);
|
||||
zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
|
||||
|
||||
if (fuidp)
|
||||
|
@ -2180,45 +2164,17 @@ done:
|
|||
}
|
||||
|
||||
/*
|
||||
* working_mode returns the permissions that were not granted
|
||||
* Check accesses of interest (AoI) against attributes of the dataset
|
||||
* such as read-only. Returns zero if no AoI conflict with dataset
|
||||
* attributes, otherwise an appropriate errno is returned.
|
||||
*/
|
||||
static int
|
||||
zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
||||
boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
|
||||
zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode)
|
||||
{
|
||||
zfs_acl_t *aclp;
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
int error;
|
||||
uid_t uid = crgetuid(cr);
|
||||
uint64_t who;
|
||||
uint16_t type, iflags;
|
||||
uint16_t entry_type;
|
||||
uint32_t access_mask;
|
||||
uint32_t deny_mask = 0;
|
||||
zfs_ace_hdr_t *acep = NULL;
|
||||
boolean_t checkit;
|
||||
uid_t fowner;
|
||||
uid_t gowner;
|
||||
|
||||
/*
|
||||
* Short circuit empty requests
|
||||
*/
|
||||
if (v4_mode == 0)
|
||||
return (0);
|
||||
|
||||
*check_privs = B_TRUE;
|
||||
|
||||
if (zfsvfs->z_replay) {
|
||||
*working_mode = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
*working_mode = v4_mode;
|
||||
|
||||
if ((v4_mode & WRITE_MASK) &&
|
||||
(zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
|
||||
(!IS_DEVVP(ZTOV(zp)))) {
|
||||
*check_privs = B_FALSE;
|
||||
(!IS_DEVVP(ZTOV(zp)) ||
|
||||
(IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) {
|
||||
return (EROFS);
|
||||
}
|
||||
|
||||
|
@ -2230,31 +2186,64 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
|||
(zp->z_phys->zp_flags & (ZFS_READONLY | ZFS_IMMUTABLE))) ||
|
||||
(ZTOV(zp)->v_type == VDIR &&
|
||||
(zp->z_phys->zp_flags & ZFS_IMMUTABLE)))) {
|
||||
*check_privs = B_FALSE;
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) &&
|
||||
(zp->z_phys->zp_flags & ZFS_NOUNLINK)) {
|
||||
*check_privs = B_FALSE;
|
||||
return (EPERM);
|
||||
}
|
||||
|
||||
if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) &&
|
||||
(zp->z_phys->zp_flags & ZFS_AV_QUARANTINED))) {
|
||||
*check_privs = B_FALSE;
|
||||
return (EACCES);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller requested that the ACL check be skipped. This
|
||||
* would only happen if the caller checked VOP_ACCESS() with a
|
||||
* 32 bit ACE mask and already had the appropriate permissions.
|
||||
*/
|
||||
if (skipaclchk) {
|
||||
*working_mode = 0;
|
||||
return (0);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The primary usage of this function is to loop through all of the
|
||||
* ACEs in the znode, determining what accesses of interest (AoI) to
|
||||
* the caller are allowed or denied. The AoI are expressed as bits in
|
||||
* the working_mode parameter. As each ACE is processed, bits covered
|
||||
* by that ACE are removed from the working_mode. This removal
|
||||
* facilitates two things. The first is that when the working mode is
|
||||
* empty (= 0), we know we've looked at all the AoI. The second is
|
||||
* that the ACE interpretation rules don't allow a later ACE to undo
|
||||
* something granted or denied by an earlier ACE. Removing the
|
||||
* discovered access or denial enforces this rule. At the end of
|
||||
* processing the ACEs, all AoI that were found to be denied are
|
||||
* placed into the working_mode, giving the caller a mask of denied
|
||||
* accesses. Returns:
|
||||
* 0 if all AoI granted
|
||||
* EACCESS if the denied mask is non-zero
|
||||
* other error if abnormal failure (e.g., IO error)
|
||||
*
|
||||
* A secondary usage of the function is to determine if any of the
|
||||
* AoI are granted. If an ACE grants any access in
|
||||
* the working_mode, we immediately short circuit out of the function.
|
||||
* This mode is chosen by setting anyaccess to B_TRUE. The
|
||||
* working_mode is not a denied access mask upon exit if the function
|
||||
* is used in this manner.
|
||||
*/
|
||||
static int
|
||||
zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
|
||||
boolean_t anyaccess, cred_t *cr)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
zfs_acl_t *aclp;
|
||||
int error;
|
||||
uid_t uid = crgetuid(cr);
|
||||
uint64_t who;
|
||||
uint16_t type, iflags;
|
||||
uint16_t entry_type;
|
||||
uint32_t access_mask;
|
||||
uint32_t deny_mask = 0;
|
||||
zfs_ace_hdr_t *acep = NULL;
|
||||
boolean_t checkit;
|
||||
uid_t fowner;
|
||||
uid_t gowner;
|
||||
|
||||
zfs_fuid_map_ids(zp, cr, &fowner, &gowner);
|
||||
|
||||
|
@ -2268,6 +2257,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
|||
|
||||
while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask,
|
||||
&iflags, &type)) {
|
||||
uint32_t mask_matched;
|
||||
|
||||
if (!zfs_acl_valid_ace_type(type, iflags))
|
||||
continue;
|
||||
|
@ -2275,6 +2265,11 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
|||
if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE))
|
||||
continue;
|
||||
|
||||
/* Skip ACE if it does not affect any AoI */
|
||||
mask_matched = (access_mask & *working_mode);
|
||||
if (!mask_matched)
|
||||
continue;
|
||||
|
||||
entry_type = (iflags & ACE_TYPE_FLAGS);
|
||||
|
||||
checkit = B_FALSE;
|
||||
|
@ -2313,14 +2308,24 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
|||
}
|
||||
|
||||
if (checkit) {
|
||||
uint32_t mask_matched = (access_mask & *working_mode);
|
||||
|
||||
if (mask_matched) {
|
||||
if (type == DENY)
|
||||
deny_mask |= mask_matched;
|
||||
|
||||
*working_mode &= ~mask_matched;
|
||||
if (type == DENY) {
|
||||
DTRACE_PROBE3(zfs__ace__denies,
|
||||
znode_t *, zp,
|
||||
zfs_ace_hdr_t *, acep,
|
||||
uint32_t, mask_matched);
|
||||
deny_mask |= mask_matched;
|
||||
} else {
|
||||
DTRACE_PROBE3(zfs__ace__allows,
|
||||
znode_t *, zp,
|
||||
zfs_ace_hdr_t *, acep,
|
||||
uint32_t, mask_matched);
|
||||
if (anyaccess) {
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
zfs_acl_free(aclp);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
*working_mode &= ~mask_matched;
|
||||
}
|
||||
|
||||
/* Are we done? */
|
||||
|
@ -2342,6 +2347,69 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
|||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if any access whatsoever granted, we don't actually
|
||||
* care what access is granted.
|
||||
*/
|
||||
boolean_t
|
||||
zfs_has_access(znode_t *zp, cred_t *cr)
|
||||
{
|
||||
uint32_t have = ACE_ALL_PERMS;
|
||||
|
||||
if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
|
||||
uid_t owner;
|
||||
|
||||
owner = zfs_fuid_map_id(zp->z_zfsvfs,
|
||||
zp->z_phys->zp_uid, cr, ZFS_OWNER);
|
||||
|
||||
return (
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VREAD) == 0 ||
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VWRITE) == 0 ||
|
||||
secpolicy_vnode_access(cr, ZTOV(zp), owner, VEXEC) == 0 ||
|
||||
secpolicy_vnode_chown(cr, B_TRUE) == 0 ||
|
||||
secpolicy_vnode_chown(cr, B_FALSE) == 0 ||
|
||||
secpolicy_vnode_setdac(cr, owner) == 0 ||
|
||||
secpolicy_vnode_remove(cr) == 0);
|
||||
}
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
|
||||
boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
|
||||
int err;
|
||||
|
||||
*working_mode = v4_mode;
|
||||
*check_privs = B_TRUE;
|
||||
|
||||
/*
|
||||
* Short circuit empty requests
|
||||
*/
|
||||
if (v4_mode == 0 || zfsvfs->z_replay) {
|
||||
*working_mode = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) {
|
||||
*check_privs = B_FALSE;
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller requested that the ACL check be skipped. This
|
||||
* would only happen if the caller checked VOP_ACCESS() with a
|
||||
* 32 bit ACE mask and already had the appropriate permissions.
|
||||
*/
|
||||
if (skipaclchk) {
|
||||
*working_mode = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs,
|
||||
cred_t *cr)
|
||||
|
|
|
@ -114,12 +114,16 @@ snapentry_compare(const void *a, const void *b)
|
|||
vnodeops_t *zfsctl_ops_root;
|
||||
vnodeops_t *zfsctl_ops_snapdir;
|
||||
vnodeops_t *zfsctl_ops_snapshot;
|
||||
vnodeops_t *zfsctl_ops_shares;
|
||||
vnodeops_t *zfsctl_ops_shares_dir;
|
||||
|
||||
static const fs_operation_def_t zfsctl_tops_root[];
|
||||
static const fs_operation_def_t zfsctl_tops_snapdir[];
|
||||
static const fs_operation_def_t zfsctl_tops_snapshot[];
|
||||
static const fs_operation_def_t zfsctl_tops_shares[];
|
||||
|
||||
static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
|
||||
static vnode_t *zfsctl_mknode_shares(vnode_t *);
|
||||
static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
|
||||
static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
|
||||
|
||||
|
@ -127,14 +131,18 @@ static gfs_opsvec_t zfsctl_opsvec[] = {
|
|||
{ ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
|
||||
{ ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
|
||||
{ ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
|
||||
{ ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
|
||||
{ ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/*
|
||||
* Root directory elements. We have only a single static entry, 'snapshot'.
|
||||
* Root directory elements. We only have two entries
|
||||
* snapshot and shares.
|
||||
*/
|
||||
static gfs_dirent_t zfsctl_root_entries[] = {
|
||||
{ "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
|
||||
{ "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
|
@ -166,21 +174,34 @@ zfsctl_fini(void)
|
|||
vn_freevnodeops(zfsctl_ops_snapdir);
|
||||
if (zfsctl_ops_snapshot)
|
||||
vn_freevnodeops(zfsctl_ops_snapshot);
|
||||
if (zfsctl_ops_shares)
|
||||
vn_freevnodeops(zfsctl_ops_shares);
|
||||
if (zfsctl_ops_shares_dir)
|
||||
vn_freevnodeops(zfsctl_ops_shares_dir);
|
||||
|
||||
zfsctl_ops_root = NULL;
|
||||
zfsctl_ops_snapdir = NULL;
|
||||
zfsctl_ops_snapshot = NULL;
|
||||
zfsctl_ops_shares = NULL;
|
||||
zfsctl_ops_shares_dir = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the inode number associated with the 'snapshot' directory.
|
||||
* Return the inode number associated with the 'snapshot' or
|
||||
* 'shares' directory.
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
static ino64_t
|
||||
zfsctl_root_inode_cb(vnode_t *vp, int index)
|
||||
{
|
||||
ASSERT(index == 0);
|
||||
return (ZFSCTL_INO_SNAPDIR);
|
||||
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
|
||||
|
||||
ASSERT(index <= 2);
|
||||
|
||||
if (index == 0)
|
||||
return (ZFSCTL_INO_SNAPDIR);
|
||||
|
||||
return (zfsvfs->z_shares_dir);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -348,6 +369,30 @@ zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
|
|||
return (0);
|
||||
}
|
||||
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
|
||||
znode_t *dzp;
|
||||
int error;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
|
||||
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
|
||||
error = VOP_FID(ZTOV(dzp), fidp, ct);
|
||||
VN_RELE(ZTOV(dzp));
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
/*
|
||||
* .zfs inode namespace
|
||||
*
|
||||
|
@ -478,7 +523,7 @@ zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
|
|||
VN_RELE(svp);
|
||||
return (error);
|
||||
}
|
||||
VFS_RELE(svp->v_vfsp);
|
||||
|
||||
/*
|
||||
* We can't use VN_RELE(), as that will try to invoke
|
||||
* zfsctl_snapdir_inactive(), which would cause us to destroy
|
||||
|
@ -691,7 +736,7 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
|
|||
return (err);
|
||||
|
||||
if (err == 0) {
|
||||
err = dmu_objset_snapshot(name, dirname, B_FALSE);
|
||||
err = dmu_objset_snapshot(name, dirname, NULL, B_FALSE);
|
||||
if (err)
|
||||
return (err);
|
||||
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
|
||||
|
@ -732,9 +777,6 @@ zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
|||
|
||||
ASSERT(dvp->v_type == VDIR);
|
||||
|
||||
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If we get a recursive call, that means we got called
|
||||
* from the domount() code while it was trying to look up the
|
||||
|
@ -746,6 +788,11 @@ zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
|||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (flags & FIGNORECASE) {
|
||||
boolean_t conflict = B_FALSE;
|
||||
|
||||
|
@ -844,7 +891,7 @@ domount:
|
|||
* Return the mounted root rather than the covered mount point.
|
||||
* Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
|
||||
* the ZFS vnode mounted on top of the GFS node. This ZFS
|
||||
* vnode is the root the newly created vfsp.
|
||||
* vnode is the root of the newly created vfsp.
|
||||
*/
|
||||
VFS_RELE(vfsp);
|
||||
err = traverse(vpp);
|
||||
|
@ -877,6 +924,37 @@ domount:
|
|||
return (err);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
|
||||
int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
|
||||
int *direntflags, pathname_t *realpnp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
|
||||
znode_t *dzp;
|
||||
int error;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
|
||||
error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
|
||||
flags, rdir, cr, ct, direntflags, realpnp);
|
||||
|
||||
VN_RELE(ZTOV(dzp));
|
||||
ZFS_EXIT(zfsvfs);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
|
||||
|
@ -921,6 +999,33 @@ zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
|
|||
return (0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
|
||||
caller_context_t *ct, int flags)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
|
||||
znode_t *dzp;
|
||||
int error;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
|
||||
error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
|
||||
VN_RELE(ZTOV(dzp));
|
||||
} else {
|
||||
*eofp = 1;
|
||||
error = ENOENT;
|
||||
}
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* pvp is the '.zfs' directory (zfsctl_node_t).
|
||||
* Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
|
||||
|
@ -946,6 +1051,45 @@ zfsctl_mknode_snapdir(vnode_t *pvp)
|
|||
return (vp);
|
||||
}
|
||||
|
||||
vnode_t *
|
||||
zfsctl_mknode_shares(vnode_t *pvp)
|
||||
{
|
||||
vnode_t *vp;
|
||||
zfsctl_node_t *sdp;
|
||||
|
||||
vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
|
||||
zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
|
||||
NULL, NULL);
|
||||
sdp = vp->v_data;
|
||||
sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
|
||||
return (vp);
|
||||
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
||||
caller_context_t *ct)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
|
||||
znode_t *dzp;
|
||||
int error;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
if (zfsvfs->z_shares_dir == 0) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
|
||||
error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
|
||||
VN_RELE(ZTOV(dzp));
|
||||
}
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
||||
|
@ -996,6 +1140,20 @@ static const fs_operation_def_t zfsctl_tops_snapdir[] = {
|
|||
{ NULL }
|
||||
};
|
||||
|
||||
static const fs_operation_def_t zfsctl_tops_shares[] = {
|
||||
{ VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
|
||||
{ VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
|
||||
{ VOPNAME_IOCTL, { .error = fs_inval } },
|
||||
{ VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } },
|
||||
{ VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
|
||||
{ VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } },
|
||||
{ VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } },
|
||||
{ VOPNAME_SEEK, { .vop_seek = fs_seek } },
|
||||
{ VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
|
||||
{ VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/*
|
||||
* pvp is the GFS vnode '.zfs/snapshot'.
|
||||
*
|
||||
|
@ -1013,7 +1171,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
|
|||
zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
|
||||
zcp = vp->v_data;
|
||||
zcp->zc_id = objset;
|
||||
VFS_HOLD(vp->v_vfsp);
|
||||
|
||||
return (vp);
|
||||
}
|
||||
|
@ -1052,7 +1209,6 @@ zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
|
|||
|
||||
mutex_exit(&sdp->sd_lock);
|
||||
VN_RELE(dvp);
|
||||
VFS_RELE(vp->v_vfsp);
|
||||
|
||||
/*
|
||||
* Dispose of the vnode for the snapshot mount point.
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -805,44 +805,49 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
|
|||
znode_t *xzp;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
zfs_fuid_info_t *fuidp = NULL;
|
||||
zfs_acl_ids_t acl_ids;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
*xvpp = NULL;
|
||||
|
||||
if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
|
||||
return (error);
|
||||
|
||||
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
|
||||
&acl_ids)) != 0)
|
||||
return (error);
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
return (EDQUOT);
|
||||
}
|
||||
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_bonus(tx, zp->z_id);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
|
||||
if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (error) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
if (error == ERESTART)
|
||||
dmu_tx_wait(tx);
|
||||
dmu_tx_abort(tx);
|
||||
return (error);
|
||||
}
|
||||
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, NULL, &fuidp);
|
||||
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, 0, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
ASSERT(xzp->z_phys->zp_parent == zp->z_id);
|
||||
dmu_buf_will_dirty(zp->z_dbuf, tx);
|
||||
zp->z_phys->zp_xattr = xzp->z_id;
|
||||
|
||||
(void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
|
||||
xzp, "", NULL, fuidp, vap);
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
xzp, "", NULL, acl_ids.z_fuidp, vap);
|
||||
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
*xvpp = ZTOV(xzp);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -96,7 +96,6 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
|||
nvlist_t *ereport, *detector;
|
||||
uint64_t ena;
|
||||
char class[64];
|
||||
int state;
|
||||
|
||||
/*
|
||||
* If we are doing a spa_tryimport(), ignore errors.
|
||||
|
@ -130,15 +129,39 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
|||
return;
|
||||
|
||||
/*
|
||||
* If the vdev has already been marked as failing due to a
|
||||
* failed probe, then ignore any subsequent I/O errors, as the
|
||||
* DE will automatically fault the vdev on the first such
|
||||
* failure.
|
||||
* If this I/O is not a retry I/O, don't post an ereport.
|
||||
* Otherwise, we risk making bad diagnoses based on B_FAILFAST
|
||||
* I/Os.
|
||||
*/
|
||||
if (vd != NULL &&
|
||||
(!vdev_readable(vd) || !vdev_writeable(vd)) &&
|
||||
strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) != 0)
|
||||
if (zio->io_error == EIO &&
|
||||
!(zio->io_flags & ZIO_FLAG_IO_RETRY))
|
||||
return;
|
||||
|
||||
if (vd != NULL) {
|
||||
/*
|
||||
* If the vdev has already been marked as failing due
|
||||
* to a failed probe, then ignore any subsequent I/O
|
||||
* errors, as the DE will automatically fault the vdev
|
||||
* on the first such failure. This also catches cases
|
||||
* where vdev_remove_wanted is set and the device has
|
||||
* not yet been asynchronously placed into the REMOVED
|
||||
* state.
|
||||
*/
|
||||
if (zio->io_vd == vd &&
|
||||
!vdev_accessible(vd, zio) &&
|
||||
strcmp(subclass, FM_EREPORT_ZFS_PROBE_FAILURE) != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Ignore checksum errors for reads from DTL regions of
|
||||
* leaf vdevs.
|
||||
*/
|
||||
if (zio->io_type == ZIO_TYPE_READ &&
|
||||
zio->io_error == ECKSUM &&
|
||||
vd->vdev_ops->vdev_op_leaf &&
|
||||
vdev_dtl_contains(vd, DTL_MISSING, zio->io_txg, 1))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if ((ereport = fm_nvlist_create(NULL)) == NULL)
|
||||
|
@ -188,14 +211,6 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
|||
* passed in.
|
||||
*/
|
||||
|
||||
/*
|
||||
* If we are importing a faulted pool, then we treat it like an open,
|
||||
* not an import. Otherwise, the DE will ignore all faults during
|
||||
* import, since the default behavior is to mark the devices as
|
||||
* persistently unavailable, not leave them in the faulted state.
|
||||
*/
|
||||
state = spa->spa_import_faulted ? SPA_LOAD_OPEN : spa->spa_load_state;
|
||||
|
||||
/*
|
||||
* Generic payload members common to all ereports.
|
||||
*/
|
||||
|
@ -203,7 +218,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
|||
DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
|
||||
DATA_TYPE_UINT64, spa_guid(spa),
|
||||
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
|
||||
state, NULL);
|
||||
spa->spa_load_state, NULL);
|
||||
|
||||
if (spa != NULL) {
|
||||
fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
|
||||
|
@ -222,14 +237,18 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
|||
DATA_TYPE_UINT64, vd->vdev_guid,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
|
||||
DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL);
|
||||
if (vd->vdev_path)
|
||||
if (vd->vdev_path != NULL)
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH,
|
||||
DATA_TYPE_STRING, vd->vdev_path, NULL);
|
||||
if (vd->vdev_devid)
|
||||
if (vd->vdev_devid != NULL)
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID,
|
||||
DATA_TYPE_STRING, vd->vdev_devid, NULL);
|
||||
if (vd->vdev_fru != NULL)
|
||||
fm_payload_set(ereport,
|
||||
FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
|
||||
DATA_TYPE_STRING, vd->vdev_fru, NULL);
|
||||
|
||||
if (pvd != NULL) {
|
||||
fm_payload_set(ereport,
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -47,8 +47,10 @@
|
|||
* During file system initialization the nvlist(s) are read and
|
||||
* two AVL trees are created. One tree is keyed by the index number
|
||||
* and the other by the domain string. Nodes are never removed from
|
||||
* trees, but new entries may be added. If a new entry is added then the
|
||||
* on-disk packed nvlist will also be updated.
|
||||
* trees, but new entries may be added. If a new entry is added then
|
||||
* the zfsvfs->z_fuid_dirty flag is set to true and the caller will then
|
||||
* be responsible for calling zfs_fuid_sync() to sync the changes to disk.
|
||||
*
|
||||
*/
|
||||
|
||||
#define FUID_IDX "fuid_idx"
|
||||
|
@ -97,6 +99,15 @@ domain_compare(const void *arg1, const void *arg2)
|
|||
return (val > 0 ? 1 : -1);
|
||||
}
|
||||
|
||||
void
|
||||
zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
|
||||
{
|
||||
avl_create(idx_tree, idx_compare,
|
||||
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
|
||||
avl_create(domain_tree, domain_compare,
|
||||
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
|
||||
}
|
||||
|
||||
/*
|
||||
* load initial fuid domain and idx trees. This function is used by
|
||||
* both the kernel and zdb.
|
||||
|
@ -108,12 +119,9 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
|
|||
dmu_buf_t *db;
|
||||
uint64_t fuid_size;
|
||||
|
||||
avl_create(idx_tree, idx_compare,
|
||||
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
|
||||
avl_create(domain_tree, domain_compare,
|
||||
sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
|
||||
|
||||
VERIFY(0 == dmu_bonus_hold(os, fuid_obj, FTAG, &db));
|
||||
ASSERT(fuid_obj != 0);
|
||||
VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
|
||||
FTAG, &db));
|
||||
fuid_size = *(uint64_t *)db->db_data;
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
|
@ -125,7 +133,8 @@ zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
|
|||
int i;
|
||||
|
||||
packed = kmem_alloc(fuid_size, KM_SLEEP);
|
||||
VERIFY(dmu_read(os, fuid_obj, 0, fuid_size, packed) == 0);
|
||||
VERIFY(dmu_read(os, fuid_obj, 0,
|
||||
fuid_size, packed, DMU_READ_PREFETCH) == 0);
|
||||
VERIFY(nvlist_unpack(packed, fuid_size,
|
||||
&nvp, 0) == 0);
|
||||
VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
|
||||
|
@ -189,10 +198,8 @@ zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
|
|||
* Load the fuid table(s) into memory.
|
||||
*/
|
||||
static void
|
||||
zfs_fuid_init(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
|
||||
zfs_fuid_init(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
|
||||
|
||||
if (zfsvfs->z_fuid_loaded) {
|
||||
|
@ -200,41 +207,101 @@ zfs_fuid_init(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
|
|||
return;
|
||||
}
|
||||
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
|
||||
/* first make sure we need to allocate object */
|
||||
|
||||
error = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
|
||||
if (error == ENOENT && tx != NULL) {
|
||||
zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
|
||||
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
|
||||
sizeof (uint64_t), tx);
|
||||
VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
|
||||
&zfsvfs->z_fuid_obj, tx) == 0);
|
||||
}
|
||||
}
|
||||
zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
|
||||
|
||||
(void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
|
||||
if (zfsvfs->z_fuid_obj != 0) {
|
||||
zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
|
||||
zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx,
|
||||
&zfsvfs->z_fuid_domain);
|
||||
zfsvfs->z_fuid_loaded = B_TRUE;
|
||||
}
|
||||
|
||||
zfsvfs->z_fuid_loaded = B_TRUE;
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* sync out AVL trees to persistent storage.
|
||||
*/
|
||||
void
|
||||
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
|
||||
{
|
||||
nvlist_t *nvp;
|
||||
nvlist_t **fuids;
|
||||
size_t nvsize = 0;
|
||||
char *packed;
|
||||
dmu_buf_t *db;
|
||||
fuid_domain_t *domnode;
|
||||
int numnodes;
|
||||
int i;
|
||||
|
||||
if (!zfsvfs->z_fuid_dirty) {
|
||||
return;
|
||||
}
|
||||
|
||||
rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
|
||||
|
||||
/*
|
||||
* First see if table needs to be created?
|
||||
*/
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
|
||||
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
|
||||
sizeof (uint64_t), tx);
|
||||
VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
|
||||
&zfsvfs->z_fuid_obj, tx) == 0);
|
||||
}
|
||||
|
||||
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
|
||||
numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
|
||||
fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
|
||||
for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
|
||||
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
|
||||
VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
|
||||
domnode->f_idx) == 0);
|
||||
VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
|
||||
VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
|
||||
domnode->f_ksid->kd_name) == 0);
|
||||
}
|
||||
VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
|
||||
fuids, numnodes) == 0);
|
||||
for (i = 0; i != numnodes; i++)
|
||||
nvlist_free(fuids[i]);
|
||||
kmem_free(fuids, numnodes * sizeof (void *));
|
||||
VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
|
||||
packed = kmem_alloc(nvsize, KM_SLEEP);
|
||||
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
|
||||
NV_ENCODE_XDR, KM_SLEEP) == 0);
|
||||
nvlist_free(nvp);
|
||||
zfsvfs->z_fuid_size = nvsize;
|
||||
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
|
||||
zfsvfs->z_fuid_size, packed, tx);
|
||||
kmem_free(packed, zfsvfs->z_fuid_size);
|
||||
VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
|
||||
FTAG, &db));
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
zfsvfs->z_fuid_dirty = B_FALSE;
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Query domain table for a given domain.
|
||||
*
|
||||
* If domain isn't found it is added to AVL trees and
|
||||
* the results are pushed out to disk.
|
||||
* If domain isn't found and addok is set, it is added to AVL trees and
|
||||
* the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be
|
||||
* necessary for the caller or another thread to detect the dirty table
|
||||
* and sync out the changes.
|
||||
*/
|
||||
int
|
||||
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
|
||||
dmu_tx_t *tx)
|
||||
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
|
||||
char **retdomain, boolean_t addok)
|
||||
{
|
||||
fuid_domain_t searchnode, *findnode;
|
||||
avl_index_t loc;
|
||||
|
@ -246,16 +313,16 @@ zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
|
|||
* for the user nobody.
|
||||
*/
|
||||
if (domain[0] == '\0') {
|
||||
*retdomain = nulldomain;
|
||||
if (retdomain)
|
||||
*retdomain = nulldomain;
|
||||
return (0);
|
||||
}
|
||||
|
||||
searchnode.f_ksid = ksid_lookupdomain(domain);
|
||||
if (retdomain) {
|
||||
if (retdomain)
|
||||
*retdomain = searchnode.f_ksid->kd_name;
|
||||
}
|
||||
if (!zfsvfs->z_fuid_loaded)
|
||||
zfs_fuid_init(zfsvfs, tx);
|
||||
zfs_fuid_init(zfsvfs);
|
||||
|
||||
retry:
|
||||
rw_enter(&zfsvfs->z_fuid_lock, rw);
|
||||
|
@ -265,15 +332,9 @@ retry:
|
|||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
ksiddomain_rele(searchnode.f_ksid);
|
||||
return (findnode->f_idx);
|
||||
} else {
|
||||
} else if (addok) {
|
||||
fuid_domain_t *domnode;
|
||||
nvlist_t *nvp;
|
||||
nvlist_t **fuids;
|
||||
uint64_t retidx;
|
||||
size_t nvsize = 0;
|
||||
char *packed;
|
||||
dmu_buf_t *db;
|
||||
int i = 0;
|
||||
|
||||
if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) {
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
|
@ -288,46 +349,11 @@ retry:
|
|||
|
||||
avl_add(&zfsvfs->z_fuid_domain, domnode);
|
||||
avl_add(&zfsvfs->z_fuid_idx, domnode);
|
||||
/*
|
||||
* Now resync the on-disk nvlist.
|
||||
*/
|
||||
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
|
||||
domnode = avl_first(&zfsvfs->z_fuid_domain);
|
||||
fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
|
||||
while (domnode) {
|
||||
VERIFY(nvlist_alloc(&fuids[i],
|
||||
NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
|
||||
domnode->f_idx) == 0);
|
||||
VERIFY(nvlist_add_uint64(fuids[i],
|
||||
FUID_OFFSET, 0) == 0);
|
||||
VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
|
||||
domnode->f_ksid->kd_name) == 0);
|
||||
domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
|
||||
}
|
||||
VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
|
||||
fuids, retidx) == 0);
|
||||
for (i = 0; i != retidx; i++)
|
||||
nvlist_free(fuids[i]);
|
||||
kmem_free(fuids, retidx * sizeof (void *));
|
||||
VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
|
||||
packed = kmem_alloc(nvsize, KM_SLEEP);
|
||||
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
|
||||
NV_ENCODE_XDR, KM_SLEEP) == 0);
|
||||
nvlist_free(nvp);
|
||||
zfsvfs->z_fuid_size = nvsize;
|
||||
dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
|
||||
zfsvfs->z_fuid_size, packed, tx);
|
||||
kmem_free(packed, zfsvfs->z_fuid_size);
|
||||
VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
|
||||
FTAG, &db));
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
zfsvfs->z_fuid_dirty = B_TRUE;
|
||||
rw_exit(&zfsvfs->z_fuid_lock);
|
||||
return (retidx);
|
||||
} else {
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -337,7 +363,7 @@ retry:
|
|||
* Returns a pointer from an avl node of the domain string.
|
||||
*
|
||||
*/
|
||||
static char *
|
||||
const char *
|
||||
zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
|
||||
{
|
||||
char *domain;
|
||||
|
@ -346,7 +372,7 @@ zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
|
|||
return (NULL);
|
||||
|
||||
if (!zfsvfs->z_fuid_loaded)
|
||||
zfs_fuid_init(zfsvfs, NULL);
|
||||
zfs_fuid_init(zfsvfs);
|
||||
|
||||
rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
|
||||
|
||||
|
@ -374,7 +400,7 @@ zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
|
|||
cred_t *cr, zfs_fuid_type_t type)
|
||||
{
|
||||
uint32_t index = FUID_INDEX(fuid);
|
||||
char *domain;
|
||||
const char *domain;
|
||||
uid_t id;
|
||||
|
||||
if (index == 0)
|
||||
|
@ -439,6 +465,7 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
|
|||
}
|
||||
|
||||
if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
|
||||
|
||||
/*
|
||||
* Now allocate fuid entry and add it on the end of the list
|
||||
*/
|
||||
|
@ -463,7 +490,7 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
|
|||
*/
|
||||
uint64_t
|
||||
zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
|
||||
dmu_tx_t *tx, cred_t *cr, zfs_fuid_info_t **fuidp)
|
||||
cred_t *cr, zfs_fuid_info_t **fuidp)
|
||||
{
|
||||
uint64_t idx;
|
||||
ksid_t *ksid;
|
||||
|
@ -490,7 +517,7 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
|
|||
rid = ksid_getrid(ksid);
|
||||
domain = ksid_getdomain(ksid);
|
||||
|
||||
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
|
||||
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
|
||||
|
||||
zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
|
||||
|
||||
|
@ -511,7 +538,7 @@ zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
|
|||
*/
|
||||
uint64_t
|
||||
zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
|
||||
zfs_fuid_type_t type, dmu_tx_t *tx, zfs_fuid_info_t **fuidpp)
|
||||
zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
|
||||
{
|
||||
const char *domain;
|
||||
char *kdomain;
|
||||
|
@ -581,10 +608,11 @@ zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
|
|||
}
|
||||
}
|
||||
|
||||
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, tx);
|
||||
idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
|
||||
|
||||
if (!zfsvfs->z_replay)
|
||||
zfs_fuid_node_add(fuidpp, kdomain, rid, idx, id, type);
|
||||
zfs_fuid_node_add(fuidpp, kdomain,
|
||||
rid, idx, id, type);
|
||||
else if (zfuid != NULL) {
|
||||
list_remove(&fuidp->z_fuids, zfuid);
|
||||
kmem_free(zfuid, sizeof (zfs_fuid_t));
|
||||
|
@ -658,16 +686,15 @@ boolean_t
|
|||
zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
|
||||
{
|
||||
ksid_t *ksid = crgetsid(cr, KSID_GROUP);
|
||||
ksidlist_t *ksidlist = crgetsidlist(cr);
|
||||
uid_t gid;
|
||||
|
||||
if (ksid) {
|
||||
if (ksid && ksidlist) {
|
||||
int i;
|
||||
ksid_t *ksid_groups;
|
||||
ksidlist_t *ksidlist = crgetsidlist(cr);
|
||||
uint32_t idx = FUID_INDEX(id);
|
||||
uint32_t rid = FUID_RID(id);
|
||||
|
||||
ASSERT(ksidlist);
|
||||
ksid_groups = ksidlist->ksl_sids;
|
||||
|
||||
for (i = 0; i != ksidlist->ksl_nsid; i++) {
|
||||
|
@ -677,7 +704,7 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
|
|||
return (B_TRUE);
|
||||
}
|
||||
} else {
|
||||
char *domain;
|
||||
const char *domain;
|
||||
|
||||
domain = zfs_fuid_find_by_idx(zfsvfs, idx);
|
||||
ASSERT(domain != NULL);
|
||||
|
@ -700,4 +727,19 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
|
|||
gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
|
||||
return (groupmember(gid, cr));
|
||||
}
|
||||
|
||||
void
|
||||
zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
|
||||
{
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -467,9 +467,6 @@ zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
|||
*/
|
||||
ssize_t zfs_immediate_write_sz = 32768;
|
||||
|
||||
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \
|
||||
sizeof (lr_write_t))
|
||||
|
||||
void
|
||||
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
|
||||
|
@ -483,29 +480,6 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
|
||||
ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */
|
||||
|
||||
/*
|
||||
* Writes are handled in three different ways:
|
||||
*
|
||||
* WR_INDIRECT:
|
||||
* In this mode, if we need to commit the write later, then the block
|
||||
* is immediately written into the file system (using dmu_sync),
|
||||
* and a pointer to the block is put into the log record.
|
||||
* When the txg commits the block is linked in.
|
||||
* This saves additionally writing the data into the log record.
|
||||
* There are a few requirements for this to occur:
|
||||
* - write is greater than zfs_immediate_write_sz
|
||||
* - not using slogs (as slogs are assumed to always be faster
|
||||
* than writing into the main pool)
|
||||
* - the write occupies only one block
|
||||
* WR_COPIED:
|
||||
* If we know we'll immediately be committing the
|
||||
* transaction (FSYNC or FDSYNC), the we allocate a larger
|
||||
* log record here for the data and copy the data in.
|
||||
* WR_NEED_COPY:
|
||||
* Otherwise we don't allocate a buffer, and *if* we need to
|
||||
* flush the write later then a buffer is allocated and
|
||||
* we retrieve the data using the dmu.
|
||||
*/
|
||||
slogging = spa_has_slogs(zilog->zl_spa);
|
||||
if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
|
||||
write_state = WR_INDIRECT;
|
||||
|
@ -535,7 +509,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
|||
(write_state == WR_COPIED ? len : 0));
|
||||
lr = (lr_write_t *)&itx->itx_lr;
|
||||
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
|
||||
zp->z_id, off, len, lr + 1) != 0) {
|
||||
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
|
||||
kmem_free(itx, offsetof(itx_t, itx_lr) +
|
||||
itx->itx_lr.lrc_reclen);
|
||||
itx = zil_itx_create(txtype, sizeof (*lr));
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
|
@ -67,6 +67,8 @@ static major_t zfs_major;
|
|||
static minor_t zfs_minor;
|
||||
static kmutex_t zfs_dev_mtx;
|
||||
|
||||
extern int sys_shutdown;
|
||||
|
||||
static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
|
||||
static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
|
||||
static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
|
||||
|
@ -145,12 +147,24 @@ zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
|
|||
* Sync a specific filesystem.
|
||||
*/
|
||||
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
||||
dsl_pool_t *dp;
|
||||
|
||||
ZFS_ENTER(zfsvfs);
|
||||
dp = dmu_objset_pool(zfsvfs->z_os);
|
||||
|
||||
/*
|
||||
* If the system is shutting down, then skip any
|
||||
* filesystems which may exist on a suspended pool.
|
||||
*/
|
||||
if (sys_shutdown && spa_suspended(dp->dp_spa)) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (zfsvfs->z_log != NULL)
|
||||
zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
|
||||
else
|
||||
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
|
||||
txg_wait_synced(dp, 0);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
} else {
|
||||
/*
|
||||
|
@ -554,6 +568,393 @@ unregister:
|
|||
|
||||
}
|
||||
|
||||
static void
|
||||
uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
|
||||
int64_t delta, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t used = 0;
|
||||
char buf[32];
|
||||
int err;
|
||||
uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
|
||||
|
||||
if (delta == 0)
|
||||
return;
|
||||
|
||||
(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
|
||||
err = zap_lookup(os, obj, buf, 8, 1, &used);
|
||||
ASSERT(err == 0 || err == ENOENT);
|
||||
/* no underflow/overflow */
|
||||
ASSERT(delta > 0 || used >= -delta);
|
||||
ASSERT(delta < 0 || used + delta > used);
|
||||
used += delta;
|
||||
if (used == 0)
|
||||
err = zap_remove(os, obj, buf, tx);
|
||||
else
|
||||
err = zap_update(os, obj, buf, 8, 1, &used, tx);
|
||||
ASSERT(err == 0);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_space_delta_cb(objset_t *os, dmu_object_type_t bonustype,
|
||||
void *oldbonus, void *newbonus,
|
||||
uint64_t oldused, uint64_t newused, dmu_tx_t *tx)
|
||||
{
|
||||
znode_phys_t *oldznp = oldbonus;
|
||||
znode_phys_t *newznp = newbonus;
|
||||
|
||||
if (bonustype != DMU_OT_ZNODE)
|
||||
return;
|
||||
|
||||
/* We charge 512 for the dnode (if it's allocated). */
|
||||
if (oldznp->zp_gen != 0)
|
||||
oldused += DNODE_SIZE;
|
||||
if (newznp->zp_gen != 0)
|
||||
newused += DNODE_SIZE;
|
||||
|
||||
if (oldznp->zp_uid == newznp->zp_uid) {
|
||||
uidacct(os, B_FALSE, oldznp->zp_uid, newused-oldused, tx);
|
||||
} else {
|
||||
uidacct(os, B_FALSE, oldznp->zp_uid, -oldused, tx);
|
||||
uidacct(os, B_FALSE, newznp->zp_uid, newused, tx);
|
||||
}
|
||||
|
||||
if (oldznp->zp_gid == newznp->zp_gid) {
|
||||
uidacct(os, B_TRUE, oldznp->zp_gid, newused-oldused, tx);
|
||||
} else {
|
||||
uidacct(os, B_TRUE, oldznp->zp_gid, -oldused, tx);
|
||||
uidacct(os, B_TRUE, newznp->zp_gid, newused, tx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
|
||||
char *domainbuf, int buflen, uid_t *ridp)
|
||||
{
|
||||
extern uint64_t strtonum(const char *str, char **nptr);
|
||||
uint64_t fuid;
|
||||
const char *domain;
|
||||
|
||||
fuid = strtonum(fuidstr, NULL);
|
||||
|
||||
domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
|
||||
if (domain)
|
||||
(void) strlcpy(domainbuf, domain, buflen);
|
||||
else
|
||||
domainbuf[0] = '\0';
|
||||
*ridp = FUID_RID(fuid);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case ZFS_PROP_USERUSED:
|
||||
return (DMU_USERUSED_OBJECT);
|
||||
case ZFS_PROP_GROUPUSED:
|
||||
return (DMU_GROUPUSED_OBJECT);
|
||||
case ZFS_PROP_USERQUOTA:
|
||||
return (zfsvfs->z_userquota_obj);
|
||||
case ZFS_PROP_GROUPQUOTA:
|
||||
return (zfsvfs->z_groupquota_obj);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
|
||||
{
|
||||
int error;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
zfs_useracct_t *buf = vbuf;
|
||||
uint64_t obj;
|
||||
|
||||
if (!dmu_objset_userspace_present(zfsvfs->z_os))
|
||||
return (ENOTSUP);
|
||||
|
||||
obj = zfs_userquota_prop_to_obj(zfsvfs, type);
|
||||
if (obj == 0) {
|
||||
*bufsizep = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
|
||||
(error = zap_cursor_retrieve(&zc, &za)) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
|
||||
*bufsizep)
|
||||
break;
|
||||
|
||||
fuidstr_to_sid(zfsvfs, za.za_name,
|
||||
buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
|
||||
|
||||
buf->zu_space = za.za_first_integer;
|
||||
buf++;
|
||||
}
|
||||
if (error == ENOENT)
|
||||
error = 0;
|
||||
|
||||
ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
|
||||
*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
|
||||
*cookiep = zap_cursor_serialize(&zc);
|
||||
zap_cursor_fini(&zc);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* buf must be big enough (eg, 32 bytes)
|
||||
*/
|
||||
static int
|
||||
id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
|
||||
char *buf, boolean_t addok)
|
||||
{
|
||||
uint64_t fuid;
|
||||
int domainid = 0;
|
||||
|
||||
if (domain && domain[0]) {
|
||||
domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
|
||||
if (domainid == -1)
|
||||
return (ENOENT);
|
||||
}
|
||||
fuid = FUID_ENCODE(domainid, rid);
|
||||
(void) sprintf(buf, "%llx", (longlong_t)fuid);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
const char *domain, uint64_t rid, uint64_t *valp)
|
||||
{
|
||||
char buf[32];
|
||||
int err;
|
||||
uint64_t obj;
|
||||
|
||||
*valp = 0;
|
||||
|
||||
if (!dmu_objset_userspace_present(zfsvfs->z_os))
|
||||
return (ENOTSUP);
|
||||
|
||||
obj = zfs_userquota_prop_to_obj(zfsvfs, type);
|
||||
if (obj == 0)
|
||||
return (0);
|
||||
|
||||
err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
|
||||
if (err == ENOENT)
|
||||
err = 0;
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
|
||||
const char *domain, uint64_t rid, uint64_t quota)
|
||||
{
|
||||
char buf[32];
|
||||
int err;
|
||||
dmu_tx_t *tx;
|
||||
uint64_t *objp;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
|
||||
return (EINVAL);
|
||||
|
||||
if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
|
||||
return (ENOTSUP);
|
||||
|
||||
objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
|
||||
&zfsvfs->z_groupquota_obj;
|
||||
|
||||
err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
|
||||
if (err)
|
||||
return (err);
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
|
||||
if (*objp == 0) {
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
|
||||
zfs_userquota_prop_prefixes[type]);
|
||||
}
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
mutex_enter(&zfsvfs->z_lock);
|
||||
if (*objp == 0) {
|
||||
*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
|
||||
}
|
||||
mutex_exit(&zfsvfs->z_lock);
|
||||
|
||||
if (quota == 0) {
|
||||
err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
|
||||
if (err == ENOENT)
|
||||
err = 0;
|
||||
} else {
|
||||
err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx);
|
||||
}
|
||||
ASSERT(err == 0);
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
dmu_tx_commit(tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
|
||||
{
|
||||
char buf[32];
|
||||
uint64_t used, quota, usedobj, quotaobj;
|
||||
int err;
|
||||
|
||||
usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
|
||||
quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
|
||||
|
||||
if (quotaobj == 0 || zfsvfs->z_replay)
|
||||
return (B_FALSE);
|
||||
|
||||
(void) sprintf(buf, "%llx", (longlong_t)fuid);
|
||||
err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
|
||||
if (err != 0)
|
||||
return (B_FALSE);
|
||||
|
||||
err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
|
||||
if (err != 0)
|
||||
return (B_FALSE);
|
||||
return (used >= quota);
|
||||
}
|
||||
|
||||
int
|
||||
zfsvfs_create(const char *osname, int mode, zfsvfs_t **zvp)
|
||||
{
|
||||
objset_t *os;
|
||||
zfsvfs_t *zfsvfs;
|
||||
uint64_t zval;
|
||||
int i, error;
|
||||
|
||||
if (error = dsl_prop_get_integer(osname, "readonly", &zval, NULL))
|
||||
return (error);
|
||||
if (zval)
|
||||
mode |= DS_MODE_READONLY;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
|
||||
if (error == EROFS) {
|
||||
mode |= DS_MODE_READONLY;
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &os);
|
||||
}
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Initialize the zfs-specific filesystem structure.
|
||||
* Should probably make this a kmem cache, shuffle fields,
|
||||
* and just bzero up to z_hold_mtx[].
|
||||
*/
|
||||
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
|
||||
zfsvfs->z_vfs = NULL;
|
||||
zfsvfs->z_parent = zfsvfs;
|
||||
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
|
||||
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
|
||||
zfsvfs->z_os = os;
|
||||
|
||||
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
|
||||
if (error) {
|
||||
goto out;
|
||||
} else if (zfsvfs->z_version > ZPL_VERSION) {
|
||||
(void) printf("Mismatched versions: File system "
|
||||
"is version %llu on-disk format, which is "
|
||||
"incompatible with this software version %lld!",
|
||||
(u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
|
||||
error = ENOTSUP;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
|
||||
goto out;
|
||||
zfsvfs->z_norm = (int)zval;
|
||||
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
|
||||
goto out;
|
||||
zfsvfs->z_utf8 = (zval != 0);
|
||||
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
|
||||
goto out;
|
||||
zfsvfs->z_case = (uint_t)zval;
|
||||
|
||||
/*
|
||||
* Fold case on file systems that are always or sometimes case
|
||||
* insensitive.
|
||||
*/
|
||||
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
|
||||
zfsvfs->z_case == ZFS_CASE_MIXED)
|
||||
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
|
||||
|
||||
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
|
||||
&zfsvfs->z_root);
|
||||
if (error)
|
||||
goto out;
|
||||
ASSERT(zfsvfs->z_root != 0);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
|
||||
&zfsvfs->z_unlinkedobj);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ,
|
||||
zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
|
||||
8, 1, &zfsvfs->z_userquota_obj);
|
||||
if (error && error != ENOENT)
|
||||
goto out;
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ,
|
||||
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
|
||||
8, 1, &zfsvfs->z_groupquota_obj);
|
||||
if (error && error != ENOENT)
|
||||
goto out;
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
|
||||
&zfsvfs->z_fuid_obj);
|
||||
if (error && error != ENOENT)
|
||||
goto out;
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
|
||||
&zfsvfs->z_shares_dir);
|
||||
if (error && error != ENOENT)
|
||||
goto out;
|
||||
|
||||
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
|
||||
offsetof(znode_t, z_link_node));
|
||||
rrw_init(&zfsvfs->z_teardown_lock);
|
||||
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
|
||||
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
*zvp = zfsvfs;
|
||||
return (0);
|
||||
|
||||
out:
|
||||
dmu_objset_close(os);
|
||||
*zvp = NULL;
|
||||
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
||||
{
|
||||
|
@ -570,6 +971,12 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|||
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
||||
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
|
||||
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
|
||||
if (zil_disable) {
|
||||
zil_destroy(zfsvfs->z_log, 0);
|
||||
zfsvfs->z_log = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are not mounting (ie: online recv), then we don't
|
||||
* have to worry about replaying the log as we blocked all
|
||||
|
@ -588,11 +995,7 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|||
else
|
||||
zfs_unlinked_drain(zfsvfs);
|
||||
|
||||
zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
|
||||
if (zil_disable) {
|
||||
zil_destroy(zfsvfs->z_log, 0);
|
||||
zfsvfs->z_log = NULL;
|
||||
} else {
|
||||
if (zfsvfs->z_log) {
|
||||
/*
|
||||
* Parse and replay the intent log.
|
||||
*
|
||||
|
@ -630,49 +1033,63 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_freezfsvfs(zfsvfs_t *zfsvfs)
|
||||
void
|
||||
zfsvfs_free(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
int i;
|
||||
extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
|
||||
|
||||
/*
|
||||
* This is a barrier to prevent the filesystem from going away in
|
||||
* zfs_znode_move() until we can safely ensure that the filesystem is
|
||||
* not unmounted. We consider the filesystem valid before the barrier
|
||||
* and invalid after the barrier.
|
||||
*/
|
||||
rw_enter(&zfsvfs_lock, RW_READER);
|
||||
rw_exit(&zfsvfs_lock);
|
||||
|
||||
zfs_fuid_destroy(zfsvfs);
|
||||
|
||||
mutex_destroy(&zfsvfs->z_znodes_lock);
|
||||
mutex_destroy(&zfsvfs->z_online_recv_lock);
|
||||
mutex_destroy(&zfsvfs->z_lock);
|
||||
list_destroy(&zfsvfs->z_all_znodes);
|
||||
rrw_destroy(&zfsvfs->z_teardown_lock);
|
||||
rw_destroy(&zfsvfs->z_teardown_inactive_lock);
|
||||
rw_destroy(&zfsvfs->z_fuid_lock);
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
|
||||
kmem_free(zfsvfs, sizeof (zfsvfs_t));
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
||||
if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
|
||||
vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_domount(vfs_t *vfsp, char *osname)
|
||||
{
|
||||
dev_t mount_dev;
|
||||
uint64_t recordsize, readonly;
|
||||
uint64_t recordsize, fsid_guid;
|
||||
int error = 0;
|
||||
int mode;
|
||||
zfsvfs_t *zfsvfs;
|
||||
znode_t *zp = NULL;
|
||||
|
||||
ASSERT(vfsp);
|
||||
ASSERT(osname);
|
||||
|
||||
/*
|
||||
* Initialize the zfs-specific filesystem structure.
|
||||
* Should probably make this a kmem cache, shuffle fields,
|
||||
* and just bzero up to z_hold_mtx[].
|
||||
*/
|
||||
zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
|
||||
error = zfsvfs_create(osname, DS_MODE_OWNER, &zfsvfs);
|
||||
if (error)
|
||||
return (error);
|
||||
zfsvfs->z_vfs = vfsp;
|
||||
zfsvfs->z_parent = zfsvfs;
|
||||
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
|
||||
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
|
||||
|
||||
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&zfsvfs->z_online_recv_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
|
||||
offsetof(znode_t, z_link_node));
|
||||
rrw_init(&zfsvfs->z_teardown_lock);
|
||||
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
|
||||
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
|
||||
|
||||
/* Initialize the generic filesystem structure. */
|
||||
vfsp->vfs_bcount = 0;
|
||||
|
@ -694,39 +1111,24 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
|||
vfsp->vfs_flag |= VFS_NOTRUNC;
|
||||
vfsp->vfs_data = zfsvfs;
|
||||
|
||||
if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
|
||||
goto out;
|
||||
|
||||
mode = DS_MODE_OWNER;
|
||||
if (readonly)
|
||||
mode |= DS_MODE_READONLY;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
|
||||
if (error == EROFS) {
|
||||
mode = DS_MODE_OWNER | DS_MODE_READONLY;
|
||||
error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
|
||||
&zfsvfs->z_os);
|
||||
}
|
||||
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
if (error = zfs_init_fs(zfsvfs, &zp))
|
||||
goto out;
|
||||
|
||||
/* The call to zfs_init_fs leaves the vnode held, release it here. */
|
||||
VN_RELE(ZTOV(zp));
|
||||
/*
|
||||
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
||||
* separates our fsid from any other filesystem types, and a
|
||||
* 56-bit objset unique ID. The objset unique ID is unique to
|
||||
* all objsets open on this system, provided by unique_create().
|
||||
* The 8-bit fs type must be put in the low bits of fsid[1]
|
||||
* because that's where other Solaris filesystems put it.
|
||||
*/
|
||||
fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
|
||||
ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
|
||||
vfsp->vfs_fsid.val[0] = fsid_guid;
|
||||
vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
|
||||
zfsfstype & 0xFF;
|
||||
|
||||
/*
|
||||
* Set features for file system.
|
||||
*/
|
||||
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
|
||||
if (zfsvfs->z_use_fuids) {
|
||||
vfs_set_feature(vfsp, VFSFT_XVATTR);
|
||||
vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
|
||||
vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS);
|
||||
vfs_set_feature(vfsp, VFSFT_ACLONCREATE);
|
||||
}
|
||||
zfs_set_fuid_feature(zfsvfs);
|
||||
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
|
||||
vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
|
||||
vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
|
||||
|
@ -739,13 +1141,16 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
|||
if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
|
||||
uint64_t pval;
|
||||
|
||||
ASSERT(mode & DS_MODE_READONLY);
|
||||
atime_changed_cb(zfsvfs, B_FALSE);
|
||||
readonly_changed_cb(zfsvfs, B_TRUE);
|
||||
if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
|
||||
goto out;
|
||||
xattr_changed_cb(zfsvfs, pval);
|
||||
zfsvfs->z_issnap = B_TRUE;
|
||||
|
||||
mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
|
||||
mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
|
||||
} else {
|
||||
error = zfsvfs_setup(zfsvfs, B_TRUE);
|
||||
}
|
||||
|
@ -754,9 +1159,8 @@ zfs_domount(vfs_t *vfsp, char *osname)
|
|||
zfsctl_create(zfsvfs);
|
||||
out:
|
||||
if (error) {
|
||||
if (zfsvfs->z_os)
|
||||
dmu_objset_close(zfsvfs->z_os);
|
||||
zfs_freezfsvfs(zfsvfs);
|
||||
dmu_objset_close(zfsvfs->z_os);
|
||||
zfsvfs_free(zfsvfs);
|
||||
} else {
|
||||
atomic_add_32(&zfs_active_fs_count, 1);
|
||||
}
|
||||
|
@ -1067,6 +1471,13 @@ zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
|
|||
|
||||
error = zfs_domount(vfsp, osname);
|
||||
|
||||
/*
|
||||
* Add an extra VFS_HOLD on our parent vfs so that it can't
|
||||
* disappear due to a forced unmount.
|
||||
*/
|
||||
if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
|
||||
VFS_HOLD(mvp->v_vfsp);
|
||||
|
||||
out:
|
||||
pn_free(&spn);
|
||||
return (error);
|
||||
|
@ -1426,15 +1837,16 @@ zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
|
|||
* 'z_teardown_inactive_lock' write held.
|
||||
*/
|
||||
int
|
||||
zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode)
|
||||
zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *modep)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
|
||||
return (error);
|
||||
|
||||
*mode = zfsvfs->z_os->os_mode;
|
||||
dmu_objset_name(zfsvfs->z_os, name);
|
||||
*modep = zfsvfs->z_os->os_mode;
|
||||
if (name)
|
||||
dmu_objset_name(zfsvfs->z_os, name);
|
||||
dmu_objset_close(zfsvfs->z_os);
|
||||
|
||||
return (0);
|
||||
|
@ -1493,13 +1905,15 @@ static void
|
|||
zfs_freevfs(vfs_t *vfsp)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = vfsp->vfs_data;
|
||||
int i;
|
||||
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
|
||||
/*
|
||||
* If this is a snapshot, we have an extra VFS_HOLD on our parent
|
||||
* from zfs_mount(). Release it here.
|
||||
*/
|
||||
if (zfsvfs->z_issnap)
|
||||
VFS_RELE(zfsvfs->z_parent->z_vfs);
|
||||
|
||||
zfs_fuid_destroy(zfsvfs);
|
||||
zfs_freezfsvfs(zfsvfs);
|
||||
zfsvfs_free(zfsvfs);
|
||||
|
||||
atomic_add_32(&zfs_active_fs_count, -1);
|
||||
}
|
||||
|
@ -1558,6 +1972,8 @@ zfs_init(void)
|
|||
* Initialize znode cache, vnode ops, etc...
|
||||
*/
|
||||
zfs_znode_init();
|
||||
|
||||
dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1574,54 +1990,46 @@ zfs_busy(void)
|
|||
}
|
||||
|
||||
int
|
||||
zfs_set_version(const char *name, uint64_t newvers)
|
||||
zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
|
||||
{
|
||||
int error;
|
||||
objset_t *os;
|
||||
objset_t *os = zfsvfs->z_os;
|
||||
dmu_tx_t *tx;
|
||||
uint64_t curvers;
|
||||
|
||||
/*
|
||||
* XXX for now, require that the filesystem be unmounted. Would
|
||||
* be nice to find the zfsvfs_t and just update that if
|
||||
* possible.
|
||||
*/
|
||||
|
||||
if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
|
||||
return (EINVAL);
|
||||
|
||||
error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_OWNER, &os);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
|
||||
8, 1, &curvers);
|
||||
if (error)
|
||||
goto out;
|
||||
if (newvers < curvers) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (newvers < zfsvfs->z_version)
|
||||
return (EINVAL);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR);
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error) {
|
||||
dmu_tx_abort(tx);
|
||||
goto out;
|
||||
return (error);
|
||||
}
|
||||
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
|
||||
8, 1, &newvers, tx);
|
||||
|
||||
if (error) {
|
||||
dmu_tx_commit(tx);
|
||||
return (error);
|
||||
}
|
||||
error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
|
||||
&newvers, tx);
|
||||
|
||||
spa_history_internal_log(LOG_DS_UPGRADE,
|
||||
dmu_objset_spa(os), tx, CRED(),
|
||||
"oldver=%llu newver=%llu dataset = %llu", curvers, newvers,
|
||||
dmu_objset_id(os));
|
||||
"oldver=%llu newver=%llu dataset = %llu",
|
||||
zfsvfs->z_version, newvers, dmu_objset_id(os));
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
out:
|
||||
dmu_objset_close(os);
|
||||
return (error);
|
||||
zfsvfs->z_version = newvers;
|
||||
|
||||
if (zfsvfs->z_version >= ZPL_VERSION_FUID)
|
||||
zfs_set_fuid_feature(zfsvfs);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -101,6 +101,7 @@
|
|||
* pushing cached pages (which acquires range locks) and syncing out
|
||||
* cached atime changes. Third, zfs_zinactive() may require a new tx,
|
||||
* which could deadlock the system if you were already holding one.
|
||||
* If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
|
||||
*
|
||||
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
|
||||
* as they can span dmu_tx_assign() calls.
|
||||
|
@ -363,7 +364,8 @@ update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid)
|
|||
caddr_t va;
|
||||
|
||||
va = zfs_map_page(pp, S_WRITE);
|
||||
(void) dmu_read(os, oid, start+off, nbytes, va+off);
|
||||
(void) dmu_read(os, oid, start+off, nbytes, va+off,
|
||||
DMU_READ_PREFETCH);
|
||||
zfs_unmap_page(pp, va);
|
||||
page_unlock(pp);
|
||||
}
|
||||
|
@ -567,6 +569,7 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||
int max_blksz = zfsvfs->z_max_blksz;
|
||||
uint64_t pflags;
|
||||
int error;
|
||||
arc_buf_t *abuf;
|
||||
|
||||
/*
|
||||
* Fasttrack empty write
|
||||
|
@ -663,10 +666,46 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||
* and allows us to do more fine-grained space accounting.
|
||||
*/
|
||||
while (n > 0) {
|
||||
abuf = NULL;
|
||||
woff = uio->uio_loffset;
|
||||
|
||||
again:
|
||||
if (zfs_usergroup_overquota(zfsvfs,
|
||||
B_FALSE, zp->z_phys->zp_uid) ||
|
||||
zfs_usergroup_overquota(zfsvfs,
|
||||
B_TRUE, zp->z_phys->zp_gid)) {
|
||||
if (abuf != NULL)
|
||||
dmu_return_arcbuf(abuf);
|
||||
error = EDQUOT;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If dmu_assign_arcbuf() is expected to execute with minimum
|
||||
* overhead loan an arc buffer and copy user data to it before
|
||||
* we enter a txg. This avoids holding a txg forever while we
|
||||
* pagefault on a hanging NFS server mapping.
|
||||
*/
|
||||
if (abuf == NULL && n >= max_blksz &&
|
||||
woff >= zp->z_phys->zp_size &&
|
||||
P2PHASE(woff, max_blksz) == 0 &&
|
||||
zp->z_blksz == max_blksz) {
|
||||
size_t cbytes;
|
||||
|
||||
abuf = dmu_request_arcbuf(zp->z_dbuf, max_blksz);
|
||||
ASSERT(abuf != NULL);
|
||||
ASSERT(arc_buf_size(abuf) == max_blksz);
|
||||
if (error = uiocopy(abuf->b_data, max_blksz,
|
||||
UIO_WRITE, uio, &cbytes)) {
|
||||
dmu_return_arcbuf(abuf);
|
||||
break;
|
||||
}
|
||||
ASSERT(cbytes == max_blksz);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start a transaction.
|
||||
*/
|
||||
woff = uio->uio_loffset;
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_bonus(tx, zp->z_id);
|
||||
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
|
||||
|
@ -675,9 +714,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||
if (error == ERESTART) {
|
||||
dmu_tx_wait(tx);
|
||||
dmu_tx_abort(tx);
|
||||
continue;
|
||||
goto again;
|
||||
}
|
||||
dmu_tx_abort(tx);
|
||||
if (abuf != NULL)
|
||||
dmu_return_arcbuf(abuf);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -706,12 +747,22 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
|||
*/
|
||||
nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
|
||||
|
||||
tx_bytes = uio->uio_resid;
|
||||
error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio, nbytes, tx);
|
||||
tx_bytes -= uio->uio_resid;
|
||||
if (tx_bytes && vn_has_cached_data(vp))
|
||||
if (abuf == NULL) {
|
||||
tx_bytes = uio->uio_resid;
|
||||
error = dmu_write_uio(zfsvfs->z_os, zp->z_id, uio,
|
||||
nbytes, tx);
|
||||
tx_bytes -= uio->uio_resid;
|
||||
} else {
|
||||
tx_bytes = nbytes;
|
||||
ASSERT(tx_bytes == max_blksz);
|
||||
dmu_assign_arcbuf(zp->z_dbuf, woff, abuf, tx);
|
||||
ASSERT(tx_bytes <= uio->uio_resid);
|
||||
uioskip(uio, tx_bytes);
|
||||
}
|
||||
if (tx_bytes && vn_has_cached_data(vp)) {
|
||||
update_pages(vp, woff,
|
||||
tx_bytes, zfsvfs->z_os, zp->z_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we made no progress, we're done. If we made even
|
||||
|
@ -791,10 +842,15 @@ zfs_get_done(dmu_buf_t *db, void *vzgd)
|
|||
zgd_t *zgd = (zgd_t *)vzgd;
|
||||
rl_t *rl = zgd->zgd_rl;
|
||||
vnode_t *vp = ZTOV(rl->r_zp);
|
||||
objset_t *os = rl->r_zp->z_zfsvfs->z_os;
|
||||
|
||||
dmu_buf_rele(db, vzgd);
|
||||
zfs_range_unlock(rl);
|
||||
VN_RELE(vp);
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
* txg stopped from syncing.
|
||||
*/
|
||||
VN_RELE_ASYNC(vp, dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
|
||||
zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
|
||||
kmem_free(zgd, sizeof (zgd_t));
|
||||
}
|
||||
|
@ -824,7 +880,12 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||
if (zfs_zget(zfsvfs, lr->lr_foid, &zp) != 0)
|
||||
return (ENOENT);
|
||||
if (zp->z_unlinked) {
|
||||
VN_RELE(ZTOV(zp));
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
* txg stopped from syncing.
|
||||
*/
|
||||
VN_RELE_ASYNC(ZTOV(zp),
|
||||
dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
|
@ -842,7 +903,8 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||
error = ENOENT;
|
||||
goto out;
|
||||
}
|
||||
VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf));
|
||||
VERIFY(0 == dmu_read(os, lr->lr_foid, off, dlen, buf,
|
||||
DMU_READ_NO_PREFETCH));
|
||||
} else { /* indirect write */
|
||||
uint64_t boff; /* block starting offset */
|
||||
|
||||
|
@ -896,7 +958,11 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||
}
|
||||
out:
|
||||
zfs_range_unlock(rl);
|
||||
VN_RELE(ZTOV(zp));
|
||||
/*
|
||||
* Release the vnode asynchronously as we currently have the
|
||||
* txg stopped from syncing.
|
||||
*/
|
||||
VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
@ -1074,11 +1140,11 @@ zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
|
|||
zfs_dirlock_t *dl;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
zfs_acl_t *aclp = NULL;
|
||||
zfs_fuid_info_t *fuidp = NULL;
|
||||
ksid_t *ksid;
|
||||
uid_t uid;
|
||||
gid_t gid = crgetgid(cr);
|
||||
zfs_acl_ids_t acl_ids;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
/*
|
||||
* If we have an ephemeral id, ACL, or XVATTR then
|
||||
|
@ -1141,21 +1207,9 @@ top:
|
|||
if (strcmp(name, "..") == 0)
|
||||
error = EISDIR;
|
||||
ZFS_EXIT(zfsvfs);
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
if (vsecp && aclp == NULL) {
|
||||
error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp);
|
||||
if (error) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
if (dl)
|
||||
zfs_dirent_unlock(dl);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
if (zp == NULL) {
|
||||
uint64_t txtype;
|
||||
|
||||
|
@ -1177,30 +1231,28 @@ top:
|
|||
goto out;
|
||||
}
|
||||
|
||||
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp,
|
||||
&acl_ids)) != 0)
|
||||
goto out;
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
error = EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(uid) ||
|
||||
IS_EPHEMERAL(gid)) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
|
||||
FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
dmu_tx_hold_bonus(tx, dzp->z_id);
|
||||
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
|
||||
if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp) {
|
||||
if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
|
||||
0, SPA_MAXBLOCKSIZE);
|
||||
}
|
||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (error) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
zfs_dirent_unlock(dl);
|
||||
if (error == ERESTART) {
|
||||
dmu_tx_wait(tx);
|
||||
|
@ -1209,19 +1261,21 @@ top:
|
|||
}
|
||||
dmu_tx_abort(tx);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
return (error);
|
||||
}
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
|
||||
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
|
||||
if (flag & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_create(zilog, tx, txtype, dzp, zp, name,
|
||||
vsecp, fuidp, vap);
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
vsecp, acl_ids.z_fuidp, vap);
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
} else {
|
||||
int aflags = (flag & FAPPEND) ? V_APPEND : 0;
|
||||
|
@ -1292,8 +1346,6 @@ out:
|
|||
*vpp = svp;
|
||||
}
|
||||
}
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
|
@ -1528,12 +1580,12 @@ zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
|
|||
uint64_t txtype;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
zfs_acl_t *aclp = NULL;
|
||||
zfs_fuid_info_t *fuidp = NULL;
|
||||
int zf = ZNEW;
|
||||
ksid_t *ksid;
|
||||
uid_t uid;
|
||||
gid_t gid = crgetgid(cr);
|
||||
zfs_acl_ids_t acl_ids;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
ASSERT(vap->va_type == VDIR);
|
||||
|
||||
|
@ -1594,38 +1646,33 @@ top:
|
|||
return (error);
|
||||
}
|
||||
|
||||
if (vsecp && aclp == NULL) {
|
||||
error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, &aclp);
|
||||
if (error) {
|
||||
zfs_dirent_unlock(dl);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, vsecp,
|
||||
&acl_ids)) != 0) {
|
||||
zfs_dirent_unlock(dl);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
zfs_dirent_unlock(dl);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (EDQUOT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new entry to the directory.
|
||||
*/
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
|
||||
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
|
||||
if ((aclp && aclp->z_has_fuids) || IS_EPHEMERAL(uid) ||
|
||||
IS_EPHEMERAL(gid)) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
if ((dzp->z_phys->zp_flags & ZFS_INHERIT_ACE) || aclp)
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
|
||||
0, SPA_MAXBLOCKSIZE);
|
||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (error) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
zfs_dirent_unlock(dl);
|
||||
if (error == ERESTART) {
|
||||
dmu_tx_wait(tx);
|
||||
|
@ -1634,19 +1681,16 @@ top:
|
|||
}
|
||||
dmu_tx_abort(tx);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create new node.
|
||||
*/
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, aclp, &fuidp);
|
||||
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
/*
|
||||
* Now put new name in parent dir.
|
||||
*/
|
||||
|
@ -1657,10 +1701,10 @@ top:
|
|||
txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, fuidp, vap);
|
||||
zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
|
||||
acl_ids.z_fuidp, vap);
|
||||
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
zfs_dirent_unlock(dl);
|
||||
|
@ -1969,6 +2013,21 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
|
|||
}
|
||||
}
|
||||
|
||||
if (flags & V_RDDIR_ACCFILTER) {
|
||||
/*
|
||||
* If we have no access at all, don't include
|
||||
* this entry in the returned information
|
||||
*/
|
||||
znode_t *ezp;
|
||||
if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
|
||||
goto skip_entry;
|
||||
if (!zfs_has_access(ezp, cr)) {
|
||||
VN_RELE(ZTOV(ezp));
|
||||
goto skip_entry;
|
||||
}
|
||||
VN_RELE(ZTOV(ezp));
|
||||
}
|
||||
|
||||
if (flags & V_RDDIR_ENTFLAGS)
|
||||
reclen = EDIRENT_RECLEN(strlen(zap.za_name));
|
||||
else
|
||||
|
@ -2020,6 +2079,7 @@ zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
|
|||
if (prefetch)
|
||||
dmu_prefetch(os, objnum, 0, 0);
|
||||
|
||||
skip_entry:
|
||||
/*
|
||||
* Move to the next entry, fill in the previous offset.
|
||||
*/
|
||||
|
@ -2120,8 +2180,6 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
|||
ZFS_VERIFY_ZP(zp);
|
||||
pzp = zp->z_phys;
|
||||
|
||||
mutex_enter(&zp->z_lock);
|
||||
|
||||
/*
|
||||
* If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
|
||||
* Also, if we are the owner don't bother, since owner should
|
||||
|
@ -2131,7 +2189,6 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
|||
(pzp->zp_uid != crgetuid(cr))) {
|
||||
if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
|
||||
skipaclchk, cr)) {
|
||||
mutex_exit(&zp->z_lock);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (error);
|
||||
}
|
||||
|
@ -2142,6 +2199,7 @@ zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
|||
* than to determine whether we were asked the question.
|
||||
*/
|
||||
|
||||
mutex_enter(&zp->z_lock);
|
||||
vap->va_type = vp->v_type;
|
||||
vap->va_mode = pzp->zp_mode & MODEMASK;
|
||||
zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
|
||||
|
@ -2312,6 +2370,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
|||
uint_t saved_mask;
|
||||
int trim_mask = 0;
|
||||
uint64_t new_mode;
|
||||
uint64_t new_uid, new_gid;
|
||||
znode_t *attrzp;
|
||||
int need_policy = FALSE;
|
||||
int err;
|
||||
|
@ -2320,6 +2379,7 @@ zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
|
|||
xoptattr_t *xoap;
|
||||
zfs_acl_t *aclp = NULL;
|
||||
boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
|
||||
boolean_t fuid_dirtied = B_FALSE;
|
||||
|
||||
if (mask == 0)
|
||||
return (0);
|
||||
|
@ -2610,30 +2670,14 @@ top:
|
|||
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_bonus(tx, zp->z_id);
|
||||
if (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
|
||||
((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid))) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & AT_MODE) {
|
||||
uint64_t pmode = pzp->zp_mode;
|
||||
|
||||
new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
|
||||
|
||||
if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) {
|
||||
dmu_tx_abort(tx);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (err);
|
||||
}
|
||||
if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
|
||||
goto out;
|
||||
if (pzp->zp_acl.z_acl_extern_obj) {
|
||||
/* Are we upgrading ACL from old V0 format to new V1 */
|
||||
if (zfsvfs->z_version <= ZPL_VERSION_FUID &&
|
||||
|
@ -2655,36 +2699,53 @@ top:
|
|||
}
|
||||
}
|
||||
|
||||
if ((mask & (AT_UID | AT_GID)) && pzp->zp_xattr != 0) {
|
||||
err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
|
||||
if (err) {
|
||||
dmu_tx_abort(tx);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
return (err);
|
||||
if (mask & (AT_UID | AT_GID)) {
|
||||
if (pzp->zp_xattr) {
|
||||
err = zfs_zget(zp->z_zfsvfs, pzp->zp_xattr, &attrzp);
|
||||
if (err)
|
||||
goto out;
|
||||
dmu_tx_hold_bonus(tx, attrzp->z_id);
|
||||
}
|
||||
if (mask & AT_UID) {
|
||||
new_uid = zfs_fuid_create(zfsvfs,
|
||||
(uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
|
||||
if (new_uid != pzp->zp_uid &&
|
||||
zfs_usergroup_overquota(zfsvfs, B_FALSE, new_uid)) {
|
||||
err = EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (mask & AT_GID) {
|
||||
new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
|
||||
cr, ZFS_GROUP, &fuidp);
|
||||
if (new_gid != pzp->zp_gid &&
|
||||
zfs_usergroup_overquota(zfsvfs, B_TRUE, new_gid)) {
|
||||
err = EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
if (fuid_dirtied) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ,
|
||||
FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
dmu_tx_hold_bonus(tx, attrzp->z_id);
|
||||
}
|
||||
|
||||
err = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (err) {
|
||||
if (attrzp)
|
||||
VN_RELE(ZTOV(attrzp));
|
||||
|
||||
if (aclp) {
|
||||
zfs_acl_free(aclp);
|
||||
aclp = NULL;
|
||||
}
|
||||
|
||||
if (err == ERESTART) {
|
||||
if (err == ERESTART)
|
||||
dmu_tx_wait(tx);
|
||||
dmu_tx_abort(tx);
|
||||
goto top;
|
||||
}
|
||||
dmu_tx_abort(tx);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dmu_buf_will_dirty(zp->z_dbuf, tx);
|
||||
|
@ -2702,7 +2763,7 @@ top:
|
|||
if (mask & AT_MODE) {
|
||||
mutex_enter(&zp->z_acl_lock);
|
||||
zp->z_phys->zp_mode = new_mode;
|
||||
err = zfs_aclset_common(zp, aclp, cr, &fuidp, tx);
|
||||
err = zfs_aclset_common(zp, aclp, cr, tx);
|
||||
ASSERT3U(err, ==, 0);
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
}
|
||||
|
@ -2711,25 +2772,17 @@ top:
|
|||
mutex_enter(&attrzp->z_lock);
|
||||
|
||||
if (mask & AT_UID) {
|
||||
pzp->zp_uid = zfs_fuid_create(zfsvfs,
|
||||
vap->va_uid, cr, ZFS_OWNER, tx, &fuidp);
|
||||
if (attrzp) {
|
||||
attrzp->z_phys->zp_uid = zfs_fuid_create(zfsvfs,
|
||||
vap->va_uid, cr, ZFS_OWNER, tx, &fuidp);
|
||||
}
|
||||
pzp->zp_uid = new_uid;
|
||||
if (attrzp)
|
||||
attrzp->z_phys->zp_uid = new_uid;
|
||||
}
|
||||
|
||||
if (mask & AT_GID) {
|
||||
pzp->zp_gid = zfs_fuid_create(zfsvfs, vap->va_gid,
|
||||
cr, ZFS_GROUP, tx, &fuidp);
|
||||
pzp->zp_gid = new_gid;
|
||||
if (attrzp)
|
||||
attrzp->z_phys->zp_gid = zfs_fuid_create(zfsvfs,
|
||||
vap->va_gid, cr, ZFS_GROUP, tx, &fuidp);
|
||||
attrzp->z_phys->zp_gid = new_gid;
|
||||
}
|
||||
|
||||
if (aclp)
|
||||
zfs_acl_free(aclp);
|
||||
|
||||
if (attrzp)
|
||||
mutex_exit(&attrzp->z_lock);
|
||||
|
||||
|
@ -2791,17 +2844,35 @@ top:
|
|||
zfs_xvattr_set(zp, xvap);
|
||||
}
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
|
||||
if (mask != 0)
|
||||
zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
|
||||
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
mutex_exit(&zp->z_lock);
|
||||
|
||||
out:
|
||||
if (attrzp)
|
||||
VN_RELE(ZTOV(attrzp));
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
if (aclp) {
|
||||
zfs_acl_free(aclp);
|
||||
aclp = NULL;
|
||||
}
|
||||
|
||||
if (fuidp) {
|
||||
zfs_fuid_info_free(fuidp);
|
||||
fuidp = NULL;
|
||||
}
|
||||
|
||||
if (err)
|
||||
dmu_tx_abort(tx);
|
||||
else
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
if (err == ERESTART)
|
||||
goto top;
|
||||
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (err);
|
||||
|
@ -3232,7 +3303,8 @@ zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr,
|
|||
int len = strlen(link);
|
||||
int error;
|
||||
int zflg = ZNEW;
|
||||
zfs_fuid_info_t *fuidp = NULL;
|
||||
zfs_acl_ids_t acl_ids;
|
||||
boolean_t fuid_dirtied;
|
||||
|
||||
ASSERT(vap->va_type == VLNK);
|
||||
|
||||
|
@ -3267,26 +3339,25 @@ top:
|
|||
return (error);
|
||||
}
|
||||
|
||||
VERIFY(0 == zfs_acl_ids_create(dzp, 0, vap, cr, NULL, &acl_ids));
|
||||
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
zfs_dirent_unlock(dl);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (EDQUOT);
|
||||
}
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
fuid_dirtied = zfsvfs->z_fuid_dirty;
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
|
||||
dmu_tx_hold_bonus(tx, dzp->z_id);
|
||||
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
|
||||
if (dzp->z_phys->zp_flags & ZFS_INHERIT_ACE)
|
||||
if (acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE)
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, SPA_MAXBLOCKSIZE);
|
||||
if (IS_EPHEMERAL(crgetuid(cr)) || IS_EPHEMERAL(crgetgid(cr))) {
|
||||
if (zfsvfs->z_fuid_obj == 0) {
|
||||
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
|
||||
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
|
||||
} else {
|
||||
dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
|
||||
dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
|
||||
FUID_SIZE_ESTIMATE(zfsvfs));
|
||||
}
|
||||
}
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_txhold(zfsvfs, tx);
|
||||
error = dmu_tx_assign(tx, TXG_NOWAIT);
|
||||
if (error) {
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
zfs_dirent_unlock(dl);
|
||||
if (error == ERESTART) {
|
||||
dmu_tx_wait(tx);
|
||||
|
@ -3306,13 +3377,16 @@ top:
|
|||
* otherwise, store it just like any other file data.
|
||||
*/
|
||||
if (sizeof (znode_phys_t) + len <= dmu_bonus_max()) {
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, NULL, &fuidp);
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, len, &acl_ids);
|
||||
if (len != 0)
|
||||
bcopy(link, zp->z_phys + 1, len);
|
||||
} else {
|
||||
dmu_buf_t *dbp;
|
||||
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, NULL, &fuidp);
|
||||
zfs_mknode(dzp, vap, tx, cr, 0, &zp, 0, &acl_ids);
|
||||
|
||||
if (fuid_dirtied)
|
||||
zfs_fuid_sync(zfsvfs, tx);
|
||||
/*
|
||||
* Nothing can access the znode yet so no locking needed
|
||||
* for growing the znode's blocksize.
|
||||
|
@ -3333,15 +3407,14 @@ top:
|
|||
* Insert the new object into the directory.
|
||||
*/
|
||||
(void) zfs_link_create(dl, zp, tx, ZNEW);
|
||||
out:
|
||||
if (error == 0) {
|
||||
uint64_t txtype = TX_SYMLINK;
|
||||
if (flags & FIGNORECASE)
|
||||
txtype |= TX_CI;
|
||||
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
|
||||
}
|
||||
if (fuidp)
|
||||
zfs_fuid_info_free(fuidp);
|
||||
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
|
@ -3618,6 +3691,12 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
|
|||
pvn_write_done(trunc, flags);
|
||||
len = filesz - off;
|
||||
}
|
||||
|
||||
if (zfs_usergroup_overquota(zfsvfs, B_FALSE, zp->z_phys->zp_uid) ||
|
||||
zfs_usergroup_overquota(zfsvfs, B_TRUE, zp->z_phys->zp_gid)) {
|
||||
err = EDQUOT;
|
||||
goto out;
|
||||
}
|
||||
top:
|
||||
tx = dmu_tx_create(zfsvfs->z_os);
|
||||
dmu_tx_hold_write(tx, zp->z_id, off, len);
|
||||
|
@ -3705,7 +3784,7 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
|
|||
else
|
||||
io_off = 0;
|
||||
if (len > 0 && ISP2(blksz))
|
||||
io_len = P2ROUNDUP_TYPED(len + (io_off - off), blksz, size_t);
|
||||
io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
|
||||
else
|
||||
io_len = 0;
|
||||
|
||||
|
@ -3869,7 +3948,8 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
|
|||
* If we can't find a page in the cache, we will create a new page
|
||||
* and fill it with file data. For efficiency, we may try to fill
|
||||
* multiple pages at once (klustering) to fill up the supplied page
|
||||
* list.
|
||||
* list. Note that the pages to be filled are held with an exclusive
|
||||
* lock to prevent access by other threads while they are being filled.
|
||||
*/
|
||||
static int
|
||||
zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
|
||||
|
@ -3888,7 +3968,8 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
|
|||
*/
|
||||
io_off = off;
|
||||
io_len = PAGESIZE;
|
||||
pp = page_create_va(vp, io_off, io_len, PG_WAIT, seg, addr);
|
||||
pp = page_create_va(vp, io_off, io_len,
|
||||
PG_EXCL | PG_WAIT, seg, addr);
|
||||
} else {
|
||||
/*
|
||||
* Try to find enough pages to fill the page list
|
||||
|
@ -3913,7 +3994,8 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
|
|||
|
||||
ASSERT3U(io_off, ==, cur_pp->p_offset);
|
||||
va = zfs_map_page(cur_pp, S_WRITE);
|
||||
err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va);
|
||||
err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
|
||||
DMU_READ_PREFETCH);
|
||||
zfs_unmap_page(cur_pp, va);
|
||||
if (err) {
|
||||
/* On error, toss the entire kluster */
|
||||
|
@ -3991,7 +4073,7 @@ zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
|
|||
*protp = PROT_ALL;
|
||||
|
||||
/*
|
||||
* Loop through the requested range [off, off + len] looking
|
||||
* Loop through the requested range [off, off + len) looking
|
||||
* for pages. If we don't find a page, we will need to create
|
||||
* a new page and fill it with data from the file.
|
||||
*/
|
||||
|
@ -4337,6 +4419,11 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
|
|||
(vp->v_type == VREG || vp->v_type == VDIR);
|
||||
return (0);
|
||||
|
||||
case _PC_ACCESS_FILTERING:
|
||||
*valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
|
||||
vp->v_type == VDIR;
|
||||
return (0);
|
||||
|
||||
case _PC_ACL_ENABLED:
|
||||
*valp = _ACL_ACE_ENABLED;
|
||||
return (0);
|
||||
|
@ -4488,6 +4575,22 @@ const fs_operation_def_t zfs_symvnodeops_template[] = {
|
|||
NULL, NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* special share hidden files vnode operations template
|
||||
*/
|
||||
vnodeops_t *zfs_sharevnodeops;
|
||||
const fs_operation_def_t zfs_sharevnodeops_template[] = {
|
||||
VOPNAME_GETATTR, { .vop_getattr = zfs_getattr },
|
||||
VOPNAME_ACCESS, { .vop_access = zfs_access },
|
||||
VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive },
|
||||
VOPNAME_FID, { .vop_fid = zfs_fid },
|
||||
VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf },
|
||||
VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr },
|
||||
VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr },
|
||||
VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support },
|
||||
NULL, NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Extended attribute directory vnode operations template
|
||||
* This template is identical to the directory vnodes
|
||||
|
|
|
@ -87,6 +87,12 @@
|
|||
* (such as VFS logic) that will not compile easily in userland.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
|
||||
* be freed before it can be safely accessed.
|
||||
*/
|
||||
krwlock_t zfsvfs_lock;
|
||||
|
||||
static kmem_cache_t *znode_cache = NULL;
|
||||
|
||||
/*ARGSUSED*/
|
||||
|
@ -154,8 +160,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
|
|||
#ifdef ZNODE_STATS
|
||||
static struct {
|
||||
uint64_t zms_zfsvfs_invalid;
|
||||
uint64_t zms_zfsvfs_recheck1;
|
||||
uint64_t zms_zfsvfs_unmounted;
|
||||
uint64_t zms_zfsvfs_recheck_invalid;
|
||||
uint64_t zms_zfsvfs_recheck2;
|
||||
uint64_t zms_obj_held;
|
||||
uint64_t zms_vnode_locked;
|
||||
uint64_t zms_not_only_dnlc;
|
||||
|
@ -206,17 +213,6 @@ zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
|
|||
POINTER_INVALIDATE(&ozp->z_zfsvfs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper function for ZFS_ENTER that returns 0 if successful and otherwise
|
||||
* returns a non-zero error code.
|
||||
*/
|
||||
static int
|
||||
zfs_enter(zfsvfs_t *zfsvfs)
|
||||
{
|
||||
ZFS_ENTER(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static kmem_cbrc_t
|
||||
zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
|
||||
|
@ -240,12 +236,32 @@ zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
|
|||
}
|
||||
|
||||
/*
|
||||
* Ensure that the filesystem is not unmounted during the move.
|
||||
* Close a small window in which it's possible that the filesystem could
|
||||
* be unmounted and freed, and zfsvfs, though valid in the previous
|
||||
* statement, could point to unrelated memory by the time we try to
|
||||
* prevent the filesystem from being unmounted.
|
||||
*/
|
||||
if (zfs_enter(zfsvfs) != 0) { /* ZFS_ENTER */
|
||||
rw_enter(&zfsvfs_lock, RW_WRITER);
|
||||
if (zfsvfs != ozp->z_zfsvfs) {
|
||||
rw_exit(&zfsvfs_lock);
|
||||
ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
|
||||
return (KMEM_CBRC_DONT_KNOW);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the znode is still valid, then so is the file system. We know that
|
||||
* no valid file system can be freed while we hold zfsvfs_lock, so we
|
||||
* can safely ensure that the filesystem is not and will not be
|
||||
* unmounted. The next statement is equivalent to ZFS_ENTER().
|
||||
*/
|
||||
rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
|
||||
if (zfsvfs->z_unmounted) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
rw_exit(&zfsvfs_lock);
|
||||
ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
|
||||
return (KMEM_CBRC_DONT_KNOW);
|
||||
}
|
||||
rw_exit(&zfsvfs_lock);
|
||||
|
||||
mutex_enter(&zfsvfs->z_znodes_lock);
|
||||
/*
|
||||
|
@ -255,7 +271,7 @@ zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
|
|||
if (zfsvfs != ozp->z_zfsvfs) {
|
||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||
ZFS_EXIT(zfsvfs);
|
||||
ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid);
|
||||
ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
|
||||
return (KMEM_CBRC_DONT_KNOW);
|
||||
}
|
||||
|
||||
|
@ -311,6 +327,7 @@ zfs_znode_init(void)
|
|||
/*
|
||||
* Initialize zcache
|
||||
*/
|
||||
rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
|
||||
ASSERT(znode_cache == NULL);
|
||||
znode_cache = kmem_cache_create("zfs_znode_cache",
|
||||
sizeof (znode_t), 0, zfs_znode_cache_constructor,
|
||||
|
@ -332,6 +349,7 @@ zfs_znode_fini(void)
|
|||
if (znode_cache)
|
||||
kmem_cache_destroy(znode_cache);
|
||||
znode_cache = NULL;
|
||||
rw_destroy(&zfsvfs_lock);
|
||||
}
|
||||
|
||||
struct vnodeops *zfs_dvnodeops;
|
||||
|
@ -339,6 +357,7 @@ struct vnodeops *zfs_fvnodeops;
|
|||
struct vnodeops *zfs_symvnodeops;
|
||||
struct vnodeops *zfs_xdvnodeops;
|
||||
struct vnodeops *zfs_evnodeops;
|
||||
struct vnodeops *zfs_sharevnodeops;
|
||||
|
||||
void
|
||||
zfs_remove_op_tables()
|
||||
|
@ -363,12 +382,15 @@ zfs_remove_op_tables()
|
|||
vn_freevnodeops(zfs_xdvnodeops);
|
||||
if (zfs_evnodeops)
|
||||
vn_freevnodeops(zfs_evnodeops);
|
||||
if (zfs_sharevnodeops)
|
||||
vn_freevnodeops(zfs_sharevnodeops);
|
||||
|
||||
zfs_dvnodeops = NULL;
|
||||
zfs_fvnodeops = NULL;
|
||||
zfs_symvnodeops = NULL;
|
||||
zfs_xdvnodeops = NULL;
|
||||
zfs_evnodeops = NULL;
|
||||
zfs_sharevnodeops = NULL;
|
||||
}
|
||||
|
||||
extern const fs_operation_def_t zfs_dvnodeops_template[];
|
||||
|
@ -376,6 +398,7 @@ extern const fs_operation_def_t zfs_fvnodeops_template[];
|
|||
extern const fs_operation_def_t zfs_xdvnodeops_template[];
|
||||
extern const fs_operation_def_t zfs_symvnodeops_template[];
|
||||
extern const fs_operation_def_t zfs_evnodeops_template[];
|
||||
extern const fs_operation_def_t zfs_sharevnodeops_template[];
|
||||
|
||||
int
|
||||
zfs_create_op_tables()
|
||||
|
@ -412,103 +435,58 @@ zfs_create_op_tables()
|
|||
|
||||
error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
|
||||
&zfs_evnodeops);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
|
||||
&zfs_sharevnodeops);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs_init_fs - Initialize the zfsvfs struct and the file system
|
||||
* incore "master" object. Verify version compatibility.
|
||||
*/
|
||||
int
|
||||
zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp)
|
||||
zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
|
||||
{
|
||||
extern int zfsfstype;
|
||||
zfs_acl_ids_t acl_ids;
|
||||
vattr_t vattr;
|
||||
znode_t *sharezp;
|
||||
vnode_t *vp;
|
||||
znode_t *zp;
|
||||
int error;
|
||||
|
||||
objset_t *os = zfsvfs->z_os;
|
||||
int i, error;
|
||||
uint64_t fsid_guid;
|
||||
uint64_t zval;
|
||||
vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
|
||||
vattr.va_type = VDIR;
|
||||
vattr.va_mode = S_IFDIR|0555;
|
||||
vattr.va_uid = crgetuid(kcred);
|
||||
vattr.va_gid = crgetgid(kcred);
|
||||
|
||||
*zpp = NULL;
|
||||
sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
|
||||
sharezp->z_unlinked = 0;
|
||||
sharezp->z_atime_dirty = 0;
|
||||
sharezp->z_zfsvfs = zfsvfs;
|
||||
|
||||
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
|
||||
if (error) {
|
||||
return (error);
|
||||
} else if (zfsvfs->z_version > ZPL_VERSION) {
|
||||
(void) printf("Mismatched versions: File system "
|
||||
"is version %llu on-disk format, which is "
|
||||
"incompatible with this software version %lld!",
|
||||
(u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
|
||||
return (ENOTSUP);
|
||||
}
|
||||
vp = ZTOV(sharezp);
|
||||
vn_reinit(vp);
|
||||
vp->v_type = VDIR;
|
||||
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
|
||||
return (error);
|
||||
zfsvfs->z_norm = (int)zval;
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
|
||||
return (error);
|
||||
zfsvfs->z_utf8 = (zval != 0);
|
||||
if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
|
||||
return (error);
|
||||
zfsvfs->z_case = (uint_t)zval;
|
||||
/*
|
||||
* Fold case on file systems that are always or sometimes case
|
||||
* insensitive.
|
||||
*/
|
||||
if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
|
||||
zfsvfs->z_case == ZFS_CASE_MIXED)
|
||||
zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
|
||||
VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
|
||||
kcred, NULL, &acl_ids));
|
||||
zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE,
|
||||
&zp, 0, &acl_ids);
|
||||
ASSERT3P(zp, ==, sharezp);
|
||||
ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
|
||||
POINTER_INVALIDATE(&sharezp->z_zfsvfs);
|
||||
error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
|
||||
zfsvfs->z_shares_dir = sharezp->z_id;
|
||||
|
||||
/*
|
||||
* The fsid is 64 bits, composed of an 8-bit fs type, which
|
||||
* separates our fsid from any other filesystem types, and a
|
||||
* 56-bit objset unique ID. The objset unique ID is unique to
|
||||
* all objsets open on this system, provided by unique_create().
|
||||
* The 8-bit fs type must be put in the low bits of fsid[1]
|
||||
* because that's where other Solaris filesystems put it.
|
||||
*/
|
||||
fsid_guid = dmu_objset_fsid_guid(os);
|
||||
ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
|
||||
zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid;
|
||||
zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
|
||||
zfsfstype & 0xFF;
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
ZTOV(sharezp)->v_count = 0;
|
||||
dmu_buf_rele(sharezp->z_dbuf, NULL);
|
||||
sharezp->z_dbuf = NULL;
|
||||
kmem_cache_free(znode_cache, sharezp);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
|
||||
&zfsvfs->z_root);
|
||||
if (error)
|
||||
return (error);
|
||||
ASSERT(zfsvfs->z_root != 0);
|
||||
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
|
||||
&zfsvfs->z_unlinkedobj);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Initialize zget mutex's
|
||||
*/
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp);
|
||||
if (error) {
|
||||
/*
|
||||
* On error, we destroy the mutexes here since it's not
|
||||
* possible for the caller to determine if the mutexes were
|
||||
* initialized properly.
|
||||
*/
|
||||
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
|
||||
mutex_destroy(&zfsvfs->z_hold_mtx[i]);
|
||||
return (error);
|
||||
}
|
||||
ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root);
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
|
||||
&zfsvfs->z_fuid_obj);
|
||||
if (error == ENOENT)
|
||||
error = 0;
|
||||
|
||||
return (0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -676,7 +654,10 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
|
|||
break;
|
||||
case VREG:
|
||||
vp->v_flag |= VMODSORT;
|
||||
vn_setops(vp, zfs_fvnodeops);
|
||||
if (zp->z_phys->zp_parent == zfsvfs->z_shares_dir)
|
||||
vn_setops(vp, zfs_sharevnodeops);
|
||||
else
|
||||
vn_setops(vp, zfs_fvnodeops);
|
||||
break;
|
||||
case VLNK:
|
||||
vn_setops(vp, zfs_symvnodeops);
|
||||
|
@ -720,8 +701,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz)
|
|||
*/
|
||||
void
|
||||
zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
||||
uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_t *setaclp,
|
||||
zfs_fuid_info_t **fuidp)
|
||||
uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_ids_t *acl_ids)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
znode_phys_t *pzp;
|
||||
|
@ -846,7 +826,12 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
|
|||
*/
|
||||
*zpp = dzp;
|
||||
}
|
||||
zfs_perm_init(*zpp, dzp, flag, vap, tx, cr, setaclp, fuidp);
|
||||
pzp->zp_uid = acl_ids->z_fuid;
|
||||
pzp->zp_gid = acl_ids->z_fgid;
|
||||
pzp->zp_mode = acl_ids->z_mode;
|
||||
VERIFY(0 == zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
|
||||
if (vap->va_mask & AT_XVATTR)
|
||||
zfs_xvattr_set(*zpp, (xvattr_t *)vap);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1474,7 +1459,7 @@ void
|
|||
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
||||
{
|
||||
zfsvfs_t zfsvfs;
|
||||
uint64_t moid, doid, version;
|
||||
uint64_t moid, obj, version;
|
||||
uint64_t sense = ZFS_CASE_SENSITIVE;
|
||||
uint64_t norm = 0;
|
||||
nvpair_t *elem;
|
||||
|
@ -1483,6 +1468,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
|||
vnode_t *vp;
|
||||
vattr_t vattr;
|
||||
znode_t *zp;
|
||||
zfs_acl_ids_t acl_ids;
|
||||
|
||||
/*
|
||||
* First attempt to create master node.
|
||||
|
@ -1499,12 +1485,12 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
|||
/*
|
||||
* Set starting attributes.
|
||||
*/
|
||||
if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
|
||||
if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_USERSPACE)
|
||||
version = ZPL_VERSION;
|
||||
else if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
|
||||
version = ZPL_VERSION_USERSPACE - 1;
|
||||
else
|
||||
version = ZPL_VERSION_FUID - 1;
|
||||
error = zap_update(os, moid, ZPL_VERSION_STR,
|
||||
8, 1, &version, tx);
|
||||
elem = NULL;
|
||||
while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
|
||||
/* For the moment we expect all zpl props to be uint64_ts */
|
||||
|
@ -1515,9 +1501,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
|||
VERIFY(nvpair_value_uint64(elem, &val) == 0);
|
||||
name = nvpair_name(elem);
|
||||
if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
|
||||
version = val;
|
||||
error = zap_update(os, moid, ZPL_VERSION_STR,
|
||||
8, 1, &version, tx);
|
||||
if (val < version)
|
||||
version = val;
|
||||
} else {
|
||||
error = zap_update(os, moid, name, 8, 1, &val, tx);
|
||||
}
|
||||
|
@ -1528,13 +1513,14 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
|||
sense = val;
|
||||
}
|
||||
ASSERT(version != 0);
|
||||
error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
|
||||
|
||||
/*
|
||||
* Create a delete queue.
|
||||
*/
|
||||
doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
|
||||
obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
|
||||
|
||||
error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx);
|
||||
error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
|
||||
ASSERT(error == 0);
|
||||
|
||||
/*
|
||||
|
@ -1575,17 +1561,28 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
|
|||
|
||||
ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
|
||||
rootzp->z_zfsvfs = &zfsvfs;
|
||||
zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, NULL, NULL);
|
||||
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
|
||||
cr, NULL, &acl_ids));
|
||||
zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, &acl_ids);
|
||||
ASSERT3P(zp, ==, rootzp);
|
||||
ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
|
||||
error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
|
||||
ASSERT(error == 0);
|
||||
zfs_acl_ids_free(&acl_ids);
|
||||
POINTER_INVALIDATE(&rootzp->z_zfsvfs);
|
||||
|
||||
ZTOV(rootzp)->v_count = 0;
|
||||
dmu_buf_rele(rootzp->z_dbuf, NULL);
|
||||
rootzp->z_dbuf = NULL;
|
||||
kmem_cache_free(znode_cache, rootzp);
|
||||
|
||||
/*
|
||||
* Create shares directory
|
||||
*/
|
||||
|
||||
error = zfs_create_share_dir(&zfsvfs, tx);
|
||||
|
||||
ASSERT(error == 0);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
|
172
module/zfs/zil.c
172
module/zfs/zil.c
|
@ -19,12 +19,13 @@
|
|||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/arc.h>
|
||||
|
@ -471,34 +472,22 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
|||
}
|
||||
|
||||
/*
|
||||
* zil_rollback_destroy() is only called by the rollback code.
|
||||
* We already have a syncing tx. Rollback has exclusive access to the
|
||||
* dataset, so we don't have to worry about concurrent zil access.
|
||||
* The actual freeing of any log blocks occurs in zil_sync() later in
|
||||
* this txg syncing phase.
|
||||
* return true if the initial log block is not valid
|
||||
*/
|
||||
void
|
||||
zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx)
|
||||
static boolean_t
|
||||
zil_empty(zilog_t *zilog)
|
||||
{
|
||||
const zil_header_t *zh = zilog->zl_header;
|
||||
uint64_t txg;
|
||||
arc_buf_t *abuf = NULL;
|
||||
|
||||
if (BP_IS_HOLE(&zh->zh_log))
|
||||
return;
|
||||
return (B_TRUE);
|
||||
|
||||
txg = dmu_tx_get_txg(tx);
|
||||
ASSERT3U(zilog->zl_destroy_txg, <, txg);
|
||||
zilog->zl_destroy_txg = txg;
|
||||
zilog->zl_keep_first = B_FALSE;
|
||||
if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* Ensure there's no outstanding ZIL IO. No lwbs or just the
|
||||
* unused one that allocated in advance is ok.
|
||||
*/
|
||||
ASSERT(zilog->zl_lwb_list.list_head.list_next ==
|
||||
zilog->zl_lwb_list.list_head.list_prev);
|
||||
(void) zil_parse(zilog, zil_free_log_block, zil_free_log_record,
|
||||
tx, zh->zh_claim_txg);
|
||||
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -520,6 +509,30 @@ zil_claim(char *osname, void *txarg)
|
|||
zilog = dmu_objset_zil(os);
|
||||
zh = zil_header_in_syncing_context(zilog);
|
||||
|
||||
if (zilog->zl_spa->spa_log_state == SPA_LOG_CLEAR) {
|
||||
if (!BP_IS_HOLE(&zh->zh_log))
|
||||
zio_free_blk(zilog->zl_spa, &zh->zh_log, first_txg);
|
||||
BP_ZERO(&zh->zh_log);
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record here whether the zil has any records to replay.
|
||||
* If the header block pointer is null or the block points
|
||||
* to the stubby then we know there are no valid log records.
|
||||
* We use the header to store this state as the the zilog gets
|
||||
* freed later in dmu_objset_close().
|
||||
* The flags (and the rest of the header fields) are cleared in
|
||||
* zil_sync() as a result of a zil_destroy(), after replaying the log.
|
||||
*
|
||||
* Note, the intent log can be empty but still need the
|
||||
* stubby to be claimed.
|
||||
*/
|
||||
if (!zil_empty(zilog)) {
|
||||
zh->zh_flags |= ZIL_REPLAY_NEEDED;
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Claim all log blocks if we haven't already done so, and remember
|
||||
* the highest claimed sequence number. This ensures that if we can
|
||||
|
@ -587,36 +600,6 @@ zil_check_log_chain(char *osname, void *txarg)
|
|||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear a log chain
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
zil_clear_log_chain(char *osname, void *txarg)
|
||||
{
|
||||
zilog_t *zilog;
|
||||
zil_header_t *zh;
|
||||
objset_t *os;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
|
||||
if (error) {
|
||||
cmn_err(CE_WARN, "can't open objset for %s", osname);
|
||||
return (0);
|
||||
}
|
||||
|
||||
zilog = dmu_objset_zil(os);
|
||||
tx = dmu_tx_create(zilog->zl_os);
|
||||
(void) dmu_tx_assign(tx, TXG_WAIT);
|
||||
zh = zil_header_in_syncing_context(zilog);
|
||||
BP_ZERO(&zh->zh_log);
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
dmu_tx_commit(tx);
|
||||
dmu_objset_close(os);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zil_vdev_compare(const void *x1, const void *x2)
|
||||
{
|
||||
|
@ -719,18 +702,26 @@ zil_lwb_write_done(zio_t *zio)
|
|||
ASSERT(zio->io_bp->blk_fill == 0);
|
||||
|
||||
/*
|
||||
* Now that we've written this log block, we have a stable pointer
|
||||
* to the next block in the chain, so it's OK to let the txg in
|
||||
* which we allocated the next block sync.
|
||||
* Ensure the lwb buffer pointer is cleared before releasing
|
||||
* the txg. If we have had an allocation failure and
|
||||
* the txg is waiting to sync then we want want zil_sync()
|
||||
* to remove the lwb so that it's not picked up as the next new
|
||||
* one in zil_commit_writer(). zil_sync() will only remove
|
||||
* the lwb if lwb_buf is null.
|
||||
*/
|
||||
txg_rele_to_sync(&lwb->lwb_txgh);
|
||||
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
lwb->lwb_buf = NULL;
|
||||
if (zio->io_error)
|
||||
zilog->zl_log_error = B_TRUE;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
/*
|
||||
* Now that we've written this log block, we have a stable pointer
|
||||
* to the next block in the chain, so it's OK to let the txg in
|
||||
* which we allocated the next block sync.
|
||||
*/
|
||||
txg_rele_to_sync(&lwb->lwb_txgh);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -752,9 +743,9 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
|
|||
}
|
||||
if (lwb->lwb_zio == NULL) {
|
||||
lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
|
||||
0, &lwb->lwb_blk, lwb->lwb_buf,
|
||||
lwb->lwb_sz, zil_lwb_write_done, lwb,
|
||||
ZIO_PRIORITY_LOG_WRITE, ZIO_FLAG_CANFAIL, &zb);
|
||||
0, &lwb->lwb_blk, lwb->lwb_buf, lwb->lwb_sz,
|
||||
zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1040,7 +1031,7 @@ zil_clean(zilog_t *zilog)
|
|||
if ((itx != NULL) &&
|
||||
(itx->itx_lr.lrc_txg <= spa_last_synced_txg(zilog->zl_spa))) {
|
||||
(void) taskq_dispatch(zilog->zl_clean_taskq,
|
||||
(void (*)(void *))zil_itx_clean, zilog, TQ_NOSLEEP);
|
||||
(task_func_t *)zil_itx_clean, zilog, TQ_SLEEP);
|
||||
}
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
@ -1216,6 +1207,13 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
|||
spa_t *spa = zilog->zl_spa;
|
||||
lwb_t *lwb;
|
||||
|
||||
/*
|
||||
* We don't zero out zl_destroy_txg, so make sure we don't try
|
||||
* to destroy it twice.
|
||||
*/
|
||||
if (spa_sync_pass(spa) != 1)
|
||||
return;
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
|
||||
ASSERT(zilog->zl_stop_sync == 0);
|
||||
|
@ -1226,7 +1224,6 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
|||
blkptr_t blk = zh->zh_log;
|
||||
|
||||
ASSERT(list_head(&zilog->zl_lwb_list) == NULL);
|
||||
ASSERT(spa_sync_pass(spa) == 1);
|
||||
|
||||
bzero(zh, sizeof (zil_header_t));
|
||||
bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));
|
||||
|
@ -1245,12 +1242,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
|||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
lwb = list_head(&zilog->zl_lwb_list);
|
||||
if (lwb == NULL) {
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
return;
|
||||
}
|
||||
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
|
||||
zh->zh_log = lwb->lwb_blk;
|
||||
if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
|
||||
break;
|
||||
|
@ -1343,25 +1335,6 @@ zil_free(zilog_t *zilog)
|
|||
kmem_free(zilog, sizeof (zilog_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* return true if the initial log block is not valid
|
||||
*/
|
||||
static boolean_t
|
||||
zil_empty(zilog_t *zilog)
|
||||
{
|
||||
const zil_header_t *zh = zilog->zl_header;
|
||||
arc_buf_t *abuf = NULL;
|
||||
|
||||
if (BP_IS_HOLE(&zh->zh_log))
|
||||
return (B_TRUE);
|
||||
|
||||
if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
|
||||
return (B_TRUE);
|
||||
|
||||
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open an intent log.
|
||||
*/
|
||||
|
@ -1417,7 +1390,7 @@ zil_suspend(zilog_t *zilog)
|
|||
const zil_header_t *zh = zilog->zl_header;
|
||||
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
if (zh->zh_claim_txg != 0) { /* unplayed log */
|
||||
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
return (EBUSY);
|
||||
}
|
||||
|
@ -1601,7 +1574,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE])
|
|||
const zil_header_t *zh = zilog->zl_header;
|
||||
zil_replay_arg_t zr;
|
||||
|
||||
if (zil_empty(zilog)) {
|
||||
if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) {
|
||||
zil_destroy(zilog, B_TRUE);
|
||||
return;
|
||||
}
|
||||
|
@ -1671,3 +1644,24 @@ out:
|
|||
mutex_exit(&zilog->zl_lock);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
zil_vdev_offline(char *osname, void *arg)
|
||||
{
|
||||
objset_t *os;
|
||||
zilog_t *zilog;
|
||||
int error;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
zilog = dmu_objset_zil(os);
|
||||
if (zil_suspend(zilog) != 0)
|
||||
error = EEXIST;
|
||||
else
|
||||
zil_resume(zilog);
|
||||
dmu_objset_close(os);
|
||||
return (error);
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue