Update core ZFS code from build 121 to build 141.

This commit is contained in:
Brian Behlendorf 2010-05-28 13:45:14 -07:00
parent 6119cb885a
commit 428870ff73
174 changed files with 35763 additions and 14592 deletions

View File

@ -1 +1 @@
http://dlc.sun.com/osol/on/downloads/b121/on-src.tar.bz2
ssh://anon@hg.opensolaris.org/hg/onnv/onnv-gate/onnv_141

File diff suppressed because it is too large Load Diff

View File

@ -40,12 +40,14 @@
extern uint8_t dump_opt[256];
static char prefix[4] = "\t\t\t";
static void
print_log_bp(const blkptr_t *bp, const char *prefix)
{
char blkbuf[BP_SPRINTF_LEN];
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
sprintf_blkptr(blkbuf, bp);
(void) printf("%s%s\n", prefix, blkbuf);
}
@ -54,19 +56,29 @@ static void
zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
{
time_t crtime = lr->lr_crtime[0];
char *name = (char *)(lr + 1);
char *link = name + strlen(name) + 1;
char *name, *link;
lr_attr_t *lrattr;
if (txtype == TX_SYMLINK)
(void) printf("\t\t\t%s -> %s\n", name, link);
else
(void) printf("\t\t\t%s\n", name);
name = (char *)(lr + 1);
(void) printf("\t\t\t%s", ctime(&crtime));
(void) printf("\t\t\tdoid %llu, foid %llu, mode %llo\n",
if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR ||
lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) {
lrattr = (lr_attr_t *)(lr + 1);
name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
}
if (txtype == TX_SYMLINK) {
link = name + strlen(name) + 1;
(void) printf("%s%s -> %s\n", prefix, name, link);
} else if (txtype != TX_MKXATTR) {
(void) printf("%s%s\n", prefix, name);
}
(void) printf("%s%s", prefix, ctime(&crtime));
(void) printf("%sdoid %llu, foid %llu, mode %llo\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
(longlong_t)lr->lr_mode);
(void) printf("\t\t\tuid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
(void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n", prefix,
(u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
(u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
}
@ -75,7 +87,7 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, lr_create_t *lr)
static void
zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
{
(void) printf("\t\t\tdoid %llu, name %s\n",
(void) printf("%sdoid %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (char *)(lr + 1));
}
@ -83,7 +95,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, lr_remove_t *lr)
static void
zil_prt_rec_link(zilog_t *zilog, int txtype, lr_link_t *lr)
{
(void) printf("\t\t\tdoid %llu, link_obj %llu, name %s\n",
(void) printf("%sdoid %llu, link_obj %llu, name %s\n", prefix,
(u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj,
(char *)(lr + 1));
}
@ -95,9 +107,9 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr)
char *snm = (char *)(lr + 1);
char *tnm = snm + strlen(snm) + 1;
(void) printf("\t\t\tsdoid %llu, tdoid %llu\n",
(void) printf("%ssdoid %llu, tdoid %llu\n", prefix,
(u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid);
(void) printf("\t\t\tsrc %s tgt %s\n", snm, tnm);
(void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm);
}
/* ARGSUSED */
@ -106,43 +118,48 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
char *data, *dlimit;
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_t zb;
char buf[SPA_MAXBLOCKSIZE];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int error;
(void) printf("\t\t\tfoid %llu, offset 0x%llx,"
" length 0x%llx, blkoff 0x%llx\n",
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length, (u_longlong_t)lr->lr_blkoff);
(void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length);
if (verbose < 5)
if (txtype == TX_WRITE2 || verbose < 5)
return;
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
(void) printf("\t\t\thas blkptr, %s\n",
(void) printf("%shas blkptr, %s\n", prefix,
bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
"will claim" : "won't claim");
print_log_bp(bp, "\t\t\t");
print_log_bp(bp, prefix);
if (BP_IS_HOLE(bp)) {
(void) printf("\t\t\tLSIZE 0x%llx\n",
(u_longlong_t)BP_GET_LSIZE(bp));
}
if (bp->blk_birth == 0) {
bzero(buf, sizeof (buf));
} else {
zbookmark_t zb;
ASSERT3U(bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], ==,
dmu_objset_id(zilog->zl_os));
zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
zb.zb_object = 0;
zb.zb_level = -1;
zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
if (error)
return;
(void) printf("%s<hole>\n", prefix);
return;
}
data = buf + lr->lr_blkoff;
if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
(void) printf("%s<block already committed>\n", prefix);
return;
}
SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
error = zio_wait(zio_read(NULL, zilog->zl_spa,
bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
if (error)
return;
data = buf;
} else {
data = (char *)(lr + 1);
}
@ -150,7 +167,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
dlimit = data + MIN(lr->lr_length,
(verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));
(void) printf("\t\t\t");
(void) printf("%s", prefix);
while (data < dlimit) {
if (isprint(*data))
(void) printf("%c ", *data);
@ -165,7 +182,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
static void
zil_prt_rec_truncate(zilog_t *zilog, int txtype, lr_truncate_t *lr)
{
(void) printf("\t\t\tfoid %llu, offset 0x%llx, length 0x%llx\n",
(void) printf("%sfoid %llu, offset 0x%llx, length 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (longlong_t)lr->lr_offset,
(u_longlong_t)lr->lr_length);
}
@ -177,38 +194,38 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
time_t atime = (time_t)lr->lr_atime[0];
time_t mtime = (time_t)lr->lr_mtime[0];
(void) printf("\t\t\tfoid %llu, mask 0x%llx\n",
(void) printf("%sfoid %llu, mask 0x%llx\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_mask);
if (lr->lr_mask & AT_MODE) {
(void) printf("\t\t\tAT_MODE %llo\n",
(void) printf("%sAT_MODE %llo\n", prefix,
(longlong_t)lr->lr_mode);
}
if (lr->lr_mask & AT_UID) {
(void) printf("\t\t\tAT_UID %llu\n",
(void) printf("%sAT_UID %llu\n", prefix,
(u_longlong_t)lr->lr_uid);
}
if (lr->lr_mask & AT_GID) {
(void) printf("\t\t\tAT_GID %llu\n",
(void) printf("%sAT_GID %llu\n", prefix,
(u_longlong_t)lr->lr_gid);
}
if (lr->lr_mask & AT_SIZE) {
(void) printf("\t\t\tAT_SIZE %llu\n",
(void) printf("%sAT_SIZE %llu\n", prefix,
(u_longlong_t)lr->lr_size);
}
if (lr->lr_mask & AT_ATIME) {
(void) printf("\t\t\tAT_ATIME %llu.%09llu %s",
(void) printf("%sAT_ATIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_atime[0],
(u_longlong_t)lr->lr_atime[1],
ctime(&atime));
}
if (lr->lr_mask & AT_MTIME) {
(void) printf("\t\t\tAT_MTIME %llu.%09llu %s",
(void) printf("%sAT_MTIME %llu.%09llu %s", prefix,
(u_longlong_t)lr->lr_mtime[0],
(u_longlong_t)lr->lr_mtime[1],
ctime(&mtime));
@ -219,7 +236,7 @@ zil_prt_rec_setattr(zilog_t *zilog, int txtype, lr_setattr_t *lr)
static void
zil_prt_rec_acl(zilog_t *zilog, int txtype, lr_acl_t *lr)
{
(void) printf("\t\t\tfoid %llu, aclcnt %llu\n",
(void) printf("%sfoid %llu, aclcnt %llu\n", prefix,
(u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_aclcnt);
}
@ -251,10 +268,11 @@ static zil_rec_info_t zil_rec_info[TX_MAX_TYPE] = {
{ zil_prt_rec_create, "TX_MKDIR_ACL " },
{ zil_prt_rec_create, "TX_MKDIR_ATTR " },
{ zil_prt_rec_create, "TX_MKDIR_ACL_ATTR " },
{ zil_prt_rec_write, "TX_WRITE2 " },
};
/* ARGSUSED */
static void
static int
print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
{
int txtype;
@ -278,23 +296,24 @@ print_log_record(zilog_t *zilog, lr_t *lr, void *arg, uint64_t claim_txg)
zil_rec_info[txtype].zri_count++;
zil_rec_info[0].zri_count++;
return (0);
}
/* ARGSUSED */
static void
static int
print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
char blkbuf[BP_SPRINTF_LEN];
char blkbuf[BP_SPRINTF_LEN + 10];
int verbose = MAX(dump_opt['d'], dump_opt['i']);
char *claim;
if (verbose <= 3)
return;
return (0);
if (verbose >= 5) {
(void) strcpy(blkbuf, ", ");
sprintf_blkptr(blkbuf + strlen(blkbuf),
BP_SPRINTF_LEN - strlen(blkbuf), bp);
sprintf_blkptr(blkbuf + strlen(blkbuf), bp);
} else {
blkbuf[0] = '\0';
}
@ -308,6 +327,8 @@ print_log_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
(void) printf("\tBlock seqno %llu, %s%s\n",
(u_longlong_t)bp->blk_cksum.zc_word[ZIL_ZC_SEQ], claim, blkbuf);
return (0);
}
static void
@ -340,17 +361,17 @@ dump_intent_log(zilog_t *zilog)
int verbose = MAX(dump_opt['d'], dump_opt['i']);
int i;
if (zh->zh_log.blk_birth == 0 || verbose < 2)
if (zh->zh_log.blk_birth == 0 || verbose < 1)
return;
(void) printf("\n ZIL header: claim_txg %llu, claim_seq %llu",
(u_longlong_t)zh->zh_claim_txg, (u_longlong_t)zh->zh_claim_seq);
(void) printf("\n ZIL header: claim_txg %llu, "
"claim_blk_seq %llu, claim_lr_seq %llu",
(u_longlong_t)zh->zh_claim_txg,
(u_longlong_t)zh->zh_claim_blk_seq,
(u_longlong_t)zh->zh_claim_lr_seq);
(void) printf(" replay_seq %llu, flags 0x%llx\n",
(u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);
if (verbose >= 4)
print_log_bp(&zh->zh_log, "\n\tfirst block: ");
for (i = 0; i < TX_MAX_TYPE; i++)
zil_rec_info[i].zri_count = 0;

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libintl.h>
@ -107,7 +106,8 @@ zfs_callback(zfs_handle_t *zhp, void *data)
zfs_prune_proplist(zhp,
cb->cb_props_table);
if (zfs_expand_proplist(zhp, cb->cb_proplist)
if (zfs_expand_proplist(zhp, cb->cb_proplist,
(cb->cb_flags & ZFS_ITER_RECVD_PROPS))
!= 0) {
free(node);
return (-1);
@ -350,11 +350,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
avl_pool = uu_avl_pool_create("zfs_pool", sizeof (zfs_node_t),
offsetof(zfs_node_t, zn_avlnode), zfs_sort, UU_DEFAULT);
if (avl_pool == NULL) {
(void) fprintf(stderr,
gettext("internal error: out of memory\n"));
exit(1);
}
if (avl_pool == NULL)
nomem();
cb.cb_sortcol = sortcol;
cb.cb_flags = flags;
@ -399,11 +396,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
sizeof (cb.cb_props_table));
}
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL) {
(void) fprintf(stderr,
gettext("internal error: out of memory\n"));
exit(1);
}
if ((cb.cb_avl = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
nomem();
if (argc == 0) {
/*
@ -453,11 +447,8 @@ zfs_for_each(int argc, char **argv, int flags, zfs_type_t types,
/*
* Finally, clean up the AVL tree.
*/
if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL) {
(void) fprintf(stderr,
gettext("internal error: out of memory"));
exit(1);
}
if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
nomem();
while ((node = uu_avl_walk_next(walk)) != NULL) {
uu_avl_remove(cb.cb_avl, node);

View File

@ -42,6 +42,7 @@ typedef struct zfs_sort_column {
#define ZFS_ITER_ARGS_CAN_BE_PATHS (1 << 1)
#define ZFS_ITER_PROP_LISTSNAPS (1 << 2)
#define ZFS_ITER_DEPTH_LIMIT (1 << 3)
#define ZFS_ITER_RECVD_PROPS (1 << 4)
int zfs_for_each(int, char **, int options, zfs_type_t,
zfs_sort_column_t *, zprop_list_t **, int, zfs_iter_f, void *);

File diff suppressed because it is too large Load Diff

View File

@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZFS_UTIL_H
#define _ZFS_UTIL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <libzfs.h>
#ifdef __cplusplus
@ -35,6 +32,7 @@ extern "C" {
#endif
void * safe_malloc(size_t size);
void nomem(void);
libzfs_handle_t *g_zfs;
#ifdef __cplusplus

View File

@ -19,14 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <libzfs.h>
#undef verify /* both libzfs.h and zfs_context.h want to define this */
#include <sys/zfs_context.h>
#include <errno.h>
@ -69,6 +66,18 @@ ziprintf(const char *fmt, ...)
va_end(ap);
}
static void
compress_slashes(const char *src, char *dest)
{
while (*src != '\0') {
*dest = *src++;
while (*dest == '/' && *src == '/')
++src;
++dest;
}
*dest = '\0';
}
/*
* Given a full path to a file, translate into a dataset name and a relative
* path within the dataset. 'dataset' must be at least MAXNAMELEN characters,
@ -76,13 +85,16 @@ ziprintf(const char *fmt, ...)
* buffer, which we need later to get the object ID.
*/
static int
parse_pathname(const char *fullpath, char *dataset, char *relpath,
parse_pathname(const char *inpath, char *dataset, char *relpath,
struct stat64 *statbuf)
{
struct extmnttab mp;
FILE *fp;
int match;
const char *rel;
char fullpath[MAXPATHLEN];
compress_slashes(inpath, fullpath);
if (fullpath[0] != '/') {
(void) fprintf(stderr, "invalid object '%s': must be full "
@ -162,8 +174,8 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
*/
sync();
if ((err = dmu_objset_open(dataset, DMU_OST_ZFS,
DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
if (err != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
return (-1);
@ -172,7 +184,7 @@ object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
record->zi_objset = dmu_objset_id(os);
record->zi_object = statbuf->st_ino;
dmu_objset_close(os);
dmu_objset_disown(os, FTAG);
return (0);
}
@ -247,17 +259,17 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
* Get the dnode associated with object, so we can calculate the block
* size.
*/
if ((err = dmu_objset_open(dataset, DMU_OST_ANY,
DS_MODE_USER | DS_MODE_READONLY, &os)) != 0) {
if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
B_TRUE, FTAG, &os)) != 0) {
(void) fprintf(stderr, "cannot open dataset '%s': %s\n",
dataset, strerror(err));
goto out;
}
if (record->zi_object == 0) {
dn = os->os->os_meta_dnode;
dn = os->os_meta_dnode;
} else {
err = dnode_hold(os->os, record->zi_object, FTAG, &dn);
err = dnode_hold(os, record->zi_object, FTAG, &dn);
if (err != 0) {
(void) fprintf(stderr, "failed to hold dnode "
"for object %llu\n",
@ -306,11 +318,11 @@ calculate_range(const char *dataset, err_type_t type, int level, char *range,
ret = 0;
out:
if (dn) {
if (dn != os->os->os_meta_dnode)
if (dn != os->os_meta_dnode)
dnode_rele(dn, FTAG);
}
if (os)
dmu_objset_close(os);
dmu_objset_disown(os, FTAG);
return (ret);
}
@ -347,8 +359,8 @@ translate_record(err_type_t type, const char *object, const char *range,
case TYPE_CONFIG:
record->zi_type = DMU_OT_PACKED_NVLIST;
break;
case TYPE_BPLIST:
record->zi_type = DMU_OT_BPLIST;
case TYPE_BPOBJ:
record->zi_type = DMU_OT_BPOBJ;
break;
case TYPE_SPACEMAP:
record->zi_type = DMU_OT_SPACE_MAP;
@ -469,6 +481,14 @@ translate_device(const char *pool, const char *device, err_type_t label_type,
record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
break;
case TYPE_LABEL_PAD1:
record->zi_start = offsetof(vdev_label_t, vl_pad1);
record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
break;
case TYPE_LABEL_PAD2:
record->zi_start = offsetof(vdev_label_t, vl_pad2);
record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
break;
}
return (0);
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -42,12 +41,12 @@
* any attempt to read from the device will return EIO, but any attempt to
* reopen the device will also return ENXIO.
* For label faults, the -L option must be specified. This allows faults
* to be injected into either the nvlist or uberblock region of all the labels
* for the specified device.
* to be injected into either the nvlist, uberblock, pad1, or pad2 region
* of all the labels for the specified device.
*
* This form of the command looks like:
*
* zinject -d device [-e errno] [-L <uber | nvlist>] pool
* zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
*
*
* DATA FAULTS
@ -70,7 +69,7 @@
* mos Any data in the MOS
* mosdir object directory
* config pool configuration
* bplist blkptr list
* bpobj blkptr list
* spacemap spacemap
* metaslab metaslab
* errlog persistent error log
@ -164,11 +163,13 @@ static const char *errtable[TYPE_INVAL] = {
"mosdir",
"metaslab",
"config",
"bplist",
"bpobj",
"spacemap",
"errlog",
"uber",
"nvlist"
"nvlist",
"pad1",
"pad2"
};
static err_type_t
@ -192,8 +193,8 @@ type_to_name(uint64_t type)
return ("metaslab");
case DMU_OT_PACKED_NVLIST:
return ("config");
case DMU_OT_BPLIST:
return ("bplist");
case DMU_OT_BPOBJ:
return ("bpobj");
case DMU_OT_SPACE_MAP:
return ("spacemap");
case DMU_OT_ERROR_LOG:
@ -222,11 +223,28 @@ usage(void)
"\t\tClear the particular record (if given a numeric ID), or\n"
"\t\tall records if 'all' is specificed.\n"
"\n"
"\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
"\tzinject -p <function name> pool\n"
"\t\tInject a panic fault at the specified function. Only \n"
"\t\tfunctions which call spa_vdev_config_exit(), or \n"
"\t\tspa_vdev_exit() will trigger a panic.\n"
"\n"
"\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
"\t [-T <read|write|free|claim|all> pool\n"
"\t\tInject a fault into a particular device or the device's\n"
"\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
"\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
"\t\t'pad1', or 'pad2'.\n"
"\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
"\n"
"\tzinject -d device -A <degrade|fault> pool\n"
"\t\tPerform a specific action on a particular device\n"
"\n"
"\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
"\t\tCause the pool to stop writing blocks yet not\n"
"\t\treport errors for a duration. Simulates buggy hardware\n"
"\t\tthat fails to honor cache flush requests.\n"
"\t\tDefault duration is 30 seconds. The machine is panicked\n"
"\t\tat the end of the duration.\n"
"\n"
"\tzinject -b objset:object:level:blkid pool\n"
"\n"
"\t\tInject an error into pool 'pool' with the numeric bookmark\n"
@ -267,7 +285,7 @@ usage(void)
"\t\t\ton a ZFS filesystem.\n"
"\n"
"\t-t <mos>\tInject errors into the MOS for objects of the given\n"
"\t\t\ttype. Valid types are: mos, mosdir, config, bplist,\n"
"\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
"\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
"\t\t\tthe poolname.\n");
}
@ -286,6 +304,12 @@ iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
&zc.zc_inject_record, data)) != 0)
return (ret);
if (errno != ENOENT) {
(void) fprintf(stderr, "Unable to list handlers: %s\n",
strerror(errno));
return (-1);
}
return (0);
}
@ -295,7 +319,7 @@ print_data_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
if (record->zi_guid != 0)
if (record->zi_guid != 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@ -327,7 +351,7 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
{
int *count = data;
if (record->zi_guid == 0)
if (record->zi_guid == 0 || record->zi_func[0] != '\0')
return (0);
if (*count == 0) {
@ -343,6 +367,27 @@ print_device_handler(int id, const char *pool, zinject_record_t *record,
return (0);
}
static int
print_panic_handler(int id, const char *pool, zinject_record_t *record,
void *data)
{
int *count = data;
if (record->zi_func[0] == '\0')
return (0);
if (*count == 0) {
(void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
(void) printf("--- --------------- ----------------\n");
}
*count += 1;
(void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
return (0);
}
/*
* Print all registered error handlers. Returns the number of handlers
* registered.
@ -356,6 +401,9 @@ print_all_handlers(void)
(void) printf("\n");
count = 0;
(void) iter_handlers(print_data_handler, &count);
(void) printf("\n");
count = 0;
(void) iter_handlers(print_panic_handler, &count);
return (count);
}
@ -386,7 +434,8 @@ cancel_all_handlers(void)
{
int ret = iter_handlers(cancel_one_handler, NULL);
(void) printf("removed all registered handlers\n");
if (ret == 0)
(void) printf("removed all registered handlers\n");
return (ret);
}
@ -443,6 +492,15 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
if (record->zi_guid) {
(void) printf(" vdev: %llx\n",
(u_longlong_t)record->zi_guid);
} else if (record->zi_func[0] != '\0') {
(void) printf(" panic function: %s\n",
record->zi_func);
} else if (record->zi_duration > 0) {
(void) printf(" time: %lld seconds\n",
(u_longlong_t)record->zi_duration);
} else if (record->zi_duration < 0) {
(void) printf(" txgs: %lld \n",
(u_longlong_t)-record->zi_duration);
} else {
(void) printf("objset: %llu\n",
(u_longlong_t)record->zi_objset);
@ -464,6 +522,22 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
return (0);
}
int
perform_action(const char *pool, zinject_record_t *record, int cmd)
{
zfs_cmd_t zc;
ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
zc.zc_guid = record->zi_guid;
zc.zc_cookie = cmd;
if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
return (0);
return (1);
}
int
main(int argc, char **argv)
{
@ -477,12 +551,17 @@ main(int argc, char **argv)
int quiet = 0;
int error = 0;
int domount = 0;
int io_type = ZIO_TYPES;
int action = VDEV_STATE_UNKNOWN;
err_type_t type = TYPE_INVAL;
err_type_t label = TYPE_INVAL;
zinject_record_t record = { 0 };
char pool[MAXNAMELEN];
char dataset[MAXNAMELEN];
zfs_handle_t *zhp;
int nowrites = 0;
int dur_txg = 0;
int dur_secs = 0;
int ret;
int flags = 0;
@ -514,11 +593,24 @@ main(int argc, char **argv)
return (0);
}
while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
while ((c = getopt(argc, argv,
":aA:b:d:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
switch (c) {
case 'a':
flags |= ZINJECT_FLUSH_ARC;
break;
case 'A':
if (strcasecmp(optarg, "degrade") == 0) {
action = VDEV_STATE_DEGRADED;
} else if (strcasecmp(optarg, "fault") == 0) {
action = VDEV_STATE_FAULTED;
} else {
(void) fprintf(stderr, "invalid action '%s': "
"must be 'degrade' or 'fault'\n", optarg);
usage();
return (1);
}
break;
case 'b':
raw = optarg;
break;
@ -554,9 +646,27 @@ main(int argc, char **argv)
case 'F':
record.zi_failfast = B_TRUE;
break;
case 'g':
dur_txg = 1;
record.zi_duration = (int)strtol(optarg, &end, 10);
if (record.zi_duration <= 0 || *end != '\0') {
(void) fprintf(stderr, "invalid duration '%s': "
"must be a positive integer\n", optarg);
usage();
return (1);
}
/* store duration of txgs as its negative */
record.zi_duration *= -1;
break;
case 'h':
usage();
return (0);
case 'I':
/* default duration, if one hasn't yet been defined */
nowrites = 1;
if (dur_secs == 0 && dur_txg == 0)
record.zi_duration = 30;
break;
case 'l':
level = (int)strtol(optarg, &end, 10);
if (*end != '\0') {
@ -569,12 +679,45 @@ main(int argc, char **argv)
case 'm':
domount = 1;
break;
case 'p':
(void) strlcpy(record.zi_func, optarg,
sizeof (record.zi_func));
break;
case 'q':
quiet = 1;
break;
case 'r':
range = optarg;
break;
case 's':
dur_secs = 1;
record.zi_duration = (int)strtol(optarg, &end, 10);
if (record.zi_duration <= 0 || *end != '\0') {
(void) fprintf(stderr, "invalid duration '%s': "
"must be a positive integer\n", optarg);
usage();
return (1);
}
break;
case 'T':
if (strcasecmp(optarg, "read") == 0) {
io_type = ZIO_TYPE_READ;
} else if (strcasecmp(optarg, "write") == 0) {
io_type = ZIO_TYPE_WRITE;
} else if (strcasecmp(optarg, "free") == 0) {
io_type = ZIO_TYPE_FREE;
} else if (strcasecmp(optarg, "claim") == 0) {
io_type = ZIO_TYPE_CLAIM;
} else if (strcasecmp(optarg, "all") == 0) {
io_type = ZIO_TYPES;
} else {
(void) fprintf(stderr, "invalid I/O type "
"'%s': must be 'read', 'write', 'free', "
"'claim' or 'all'\n", optarg);
usage();
return (1);
}
break;
case 't':
if ((type = name_to_type(optarg)) == TYPE_INVAL &&
!MOS_TYPE(type)) {
@ -617,7 +760,8 @@ main(int argc, char **argv)
* '-c' is invalid with any other options.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0) {
level != 0 || record.zi_func[0] != '\0' ||
record.zi_duration != 0) {
(void) fprintf(stderr, "cancel (-c) incompatible with "
"any other options\n");
usage();
@ -649,7 +793,8 @@ main(int argc, char **argv)
* for doing injection, so handle it separately here.
*/
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0) {
level != 0 || record.zi_func[0] != '\0' ||
record.zi_duration != 0) {
(void) fprintf(stderr, "device (-d) incompatible with "
"data error injection\n");
usage();
@ -672,12 +817,18 @@ main(int argc, char **argv)
return (1);
}
record.zi_iotype = io_type;
if (translate_device(pool, device, label, &record) != 0)
return (1);
if (!error)
error = ENXIO;
if (action != VDEV_STATE_UNKNOWN)
return (perform_action(pool, &record, action));
} else if (raw != NULL) {
if (range != NULL || type != TYPE_INVAL || level != 0) {
if (range != NULL || type != TYPE_INVAL || level != 0 ||
record.zi_func[0] != '\0' || record.zi_duration != 0) {
(void) fprintf(stderr, "raw (-b) format with "
"any other options\n");
usage();
@ -704,10 +855,52 @@ main(int argc, char **argv)
return (1);
if (!error)
error = EIO;
} else if (record.zi_func[0] != '\0') {
if (raw != NULL || range != NULL || type != TYPE_INVAL ||
level != 0 || device != NULL || record.zi_duration != 0) {
(void) fprintf(stderr, "panic (-p) incompatible with "
"other options\n");
usage();
return (2);
}
if (argc < 1 || argc > 2) {
(void) fprintf(stderr, "panic (-p) injection requires "
"a single pool name and an optional id\n");
usage();
return (2);
}
(void) strcpy(pool, argv[0]);
if (argv[1] != NULL)
record.zi_type = atoi(argv[1]);
dataset[0] = '\0';
} else if (record.zi_duration != 0) {
if (nowrites == 0) {
(void) fprintf(stderr, "-s or -g meaningless "
"without -I (ignore writes)\n");
usage();
return (2);
} else if (dur_secs && dur_txg) {
(void) fprintf(stderr, "choose a duration either "
"in seconds (-s) or a number of txgs (-g) "
"but not both\n");
usage();
return (2);
} else if (argc != 1) {
(void) fprintf(stderr, "ignore writes (-I) "
"injection requires a single pool name\n");
usage();
return (2);
}
(void) strcpy(pool, argv[0]);
dataset[0] = '\0';
} else if (type == TYPE_INVAL) {
if (flags == 0) {
(void) fprintf(stderr, "at least one of '-b', '-d', "
"'-t', '-a', or '-u' must be specified\n");
"'-t', '-a', '-p', '-I' or '-u' "
"must be specified\n");
usage();
return (2);
}

View File

@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZINJECT_H
#define _ZINJECT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_ioctl.h>
#ifdef __cplusplus
@ -41,11 +38,13 @@ typedef enum {
TYPE_MOSDIR, /* MOS object directory */
TYPE_METASLAB, /* metaslab objects */
TYPE_CONFIG, /* MOS config */
TYPE_BPLIST, /* block pointer list */
TYPE_BPOBJ, /* block pointer list */
TYPE_SPACEMAP, /* space map objects */
TYPE_ERRLOG, /* persistent error log */
TYPE_LABEL_UBERBLOCK, /* label specific uberblock */
TYPE_LABEL_NVLIST, /* label specific nvlist */
TYPE_LABEL_PAD1, /* label specific 8K pad1 area */
TYPE_LABEL_PAD2, /* label specific 8K pad2 area */
TYPE_INVAL
} err_type_t;

File diff suppressed because it is too large Load Diff

View File

@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <errno.h>
#include <libgen.h>
#include <libintl.h>
@ -50,22 +48,6 @@ safe_malloc(size_t size)
return (data);
}
/*
* Same as above, but for strdup()
*/
char *
safe_strdup(const char *str)
{
char *ret;
if ((ret = strdup(str)) == NULL) {
(void) fprintf(stderr, "internal error: out of memory\n");
exit(1);
}
return (ret);
}
/*
* Display an out of memory error message and abort the current program.
*/

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef ZPOOL_UTIL_H
@ -37,7 +36,6 @@ extern "C" {
* Basic utility functions
*/
void *safe_malloc(size_t);
char *safe_strdup(const char *);
void zpool_no_memory(void);
uint_t num_logs(nvlist_t *nv);
@ -46,7 +44,9 @@ uint_t num_logs(nvlist_t *nv);
*/
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
boolean_t isreplace, boolean_t dryrun, int argc, char **argv);
boolean_t replacing, boolean_t dryrun, int argc, char **argv);
nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
nvlist_t *props, splitflags_t flags, int argc, char **argv);
/*
* Pool list functions

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -1004,8 +1003,8 @@ is_spare(nvlist_t *config, const char *path)
return (B_FALSE);
}
free(name);
(void) close(fd);
verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
nvlist_free(label);
@ -1029,8 +1028,8 @@ is_spare(nvlist_t *config, const char *path)
* the majority of this task.
*/
static int
check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
int isspare)
check_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
boolean_t replacing, boolean_t isspare)
{
nvlist_t **child;
uint_t c, children;
@ -1051,13 +1050,14 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
* hot spare within the same pool. If so, we allow it
* regardless of what libdiskmgt or zpool_in_use() says.
*/
if (isreplacing) {
if (replacing) {
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
&wholedisk) == 0 && wholedisk)
(void) snprintf(buf, sizeof (buf), "%ss0",
path);
else
(void) strlcpy(buf, path, sizeof (buf));
if (is_spare(config, buf))
return (0);
}
@ -1073,21 +1073,21 @@ check_in_use(nvlist_t *config, nvlist_t *nv, int force, int isreplacing,
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
isreplacing, B_FALSE)) != 0)
replacing, B_FALSE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
isreplacing, B_TRUE)) != 0)
replacing, B_TRUE)) != 0)
return (ret);
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
&child, &children) == 0)
for (c = 0; c < children; c++)
if ((ret = check_in_use(config, child[c], force,
isreplacing, B_FALSE)) != 0)
replacing, B_FALSE)) != 0)
return (ret);
return (0);
@ -1360,6 +1360,52 @@ construct_spec(int argc, char **argv)
return (nvroot);
}
nvlist_t *
split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
splitflags_t flags, int argc, char **argv)
{
nvlist_t *newroot = NULL, **child;
uint_t c, children;
if (argc > 0) {
if ((newroot = construct_spec(argc, argv)) == NULL) {
(void) fprintf(stderr, gettext("Unable to build a "
"pool from the specified devices\n"));
return (NULL);
}
if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
nvlist_free(newroot);
return (NULL);
}
/* avoid any tricks in the spec */
verify(nvlist_lookup_nvlist_array(newroot,
ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
for (c = 0; c < children; c++) {
char *path;
const char *type;
int min, max;
verify(nvlist_lookup_string(child[c],
ZPOOL_CONFIG_PATH, &path) == 0);
if ((type = is_grouping(path, &min, &max)) != NULL) {
(void) fprintf(stderr, gettext("Cannot use "
"'%s' as a device for splitting\n"), type);
nvlist_free(newroot);
return (NULL);
}
}
}
if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
if (newroot != NULL)
nvlist_free(newroot);
return (NULL);
}
return (newroot);
}
/*
* Get and validate the contents of the given vdev specification. This ensures
@ -1373,7 +1419,7 @@ construct_spec(int argc, char **argv)
*/
nvlist_t *
make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
boolean_t isreplacing, boolean_t dryrun, int argc, char **argv)
boolean_t replacing, boolean_t dryrun, int argc, char **argv)
{
nvlist_t *newroot;
nvlist_t *poolconfig = NULL;
@ -1396,8 +1442,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
* uses (such as a dedicated dump device) that even '-f' cannot
* override.
*/
if (check_in_use(poolconfig, newroot, force, isreplacing,
B_FALSE) != 0) {
if (check_in_use(poolconfig, newroot, force, replacing, B_FALSE) != 0) {
nvlist_free(newroot);
return (NULL);
}

File diff suppressed because it is too large Load Diff

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _LIBNVPAIR_H
#define _LIBNVPAIR_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/nvpair.h>
#include <stdlib.h>
#include <stdio.h>
@ -40,6 +38,7 @@ extern "C" {
void nvlist_print(FILE *, nvlist_t *);
int nvpair_value_match(nvpair_t *, int, char *, char **);
int nvpair_value_match_regex(nvpair_t *, int, char *, regex_t *, char **);
void dump_nvlist(nvlist_t *, int);
#ifdef __cplusplus
}

View File

@ -19,14 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <unistd.h>
#include <strings.h>
#include <libintl.h>
#include <sys/types.h>
#include <sys/inttypes.h>
#include "libnvpair.h"
@ -272,6 +271,156 @@ nvlist_print(FILE *fp, nvlist_t *nvl)
nvlist_print_with_indent(fp, nvl, 0);
}
#define NVP(elem, type, vtype, ptype, format) { \
vtype value; \
\
(void) nvpair_value_##type(elem, &value); \
(void) printf("%*s%s: " format "\n", indent, "", \
nvpair_name(elem), (ptype)value); \
}
#define NVPA(elem, type, vtype, ptype, format) { \
uint_t i, count; \
vtype *value; \
\
(void) nvpair_value_##type(elem, &value, &count); \
for (i = 0; i < count; i++) { \
(void) printf("%*s%s[%d]: " format "\n", indent, "", \
nvpair_name(elem), i, (ptype)value[i]); \
} \
}
/*
* Similar to nvlist_print() but handles arrays slightly differently.
*/
void
dump_nvlist(nvlist_t *list, int indent)
{
nvpair_t *elem = NULL;
boolean_t bool_value;
nvlist_t *nvlist_value;
nvlist_t **nvlist_array_value;
uint_t i, count;
if (list == NULL) {
return;
}
while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
switch (nvpair_type(elem)) {
case DATA_TYPE_BOOLEAN_VALUE:
(void) nvpair_value_boolean_value(elem, &bool_value);
(void) printf("%*s%s: %s\n", indent, "",
nvpair_name(elem), bool_value ? "true" : "false");
break;
case DATA_TYPE_BYTE:
NVP(elem, byte, uchar_t, int, "%u");
break;
case DATA_TYPE_INT8:
NVP(elem, int8, int8_t, int, "%d");
break;
case DATA_TYPE_UINT8:
NVP(elem, uint8, uint8_t, int, "%u");
break;
case DATA_TYPE_INT16:
NVP(elem, int16, int16_t, int, "%d");
break;
case DATA_TYPE_UINT16:
NVP(elem, uint16, uint16_t, int, "%u");
break;
case DATA_TYPE_INT32:
NVP(elem, int32, int32_t, long, "%ld");
break;
case DATA_TYPE_UINT32:
NVP(elem, uint32, uint32_t, ulong_t, "%lu");
break;
case DATA_TYPE_INT64:
NVP(elem, int64, int64_t, longlong_t, "%lld");
break;
case DATA_TYPE_UINT64:
NVP(elem, uint64, uint64_t, u_longlong_t, "%llu");
break;
case DATA_TYPE_STRING:
NVP(elem, string, char *, char *, "'%s'");
break;
case DATA_TYPE_BYTE_ARRAY:
NVPA(elem, byte_array, uchar_t, int, "%u");
break;
case DATA_TYPE_INT8_ARRAY:
NVPA(elem, int8_array, int8_t, int, "%d");
break;
case DATA_TYPE_UINT8_ARRAY:
NVPA(elem, uint8_array, uint8_t, int, "%u");
break;
case DATA_TYPE_INT16_ARRAY:
NVPA(elem, int16_array, int16_t, int, "%d");
break;
case DATA_TYPE_UINT16_ARRAY:
NVPA(elem, uint16_array, uint16_t, int, "%u");
break;
case DATA_TYPE_INT32_ARRAY:
NVPA(elem, int32_array, int32_t, long, "%ld");
break;
case DATA_TYPE_UINT32_ARRAY:
NVPA(elem, uint32_array, uint32_t, ulong_t, "%lu");
break;
case DATA_TYPE_INT64_ARRAY:
NVPA(elem, int64_array, int64_t, longlong_t, "%lld");
break;
case DATA_TYPE_UINT64_ARRAY:
NVPA(elem, uint64_array, uint64_t, u_longlong_t,
"%llu");
break;
case DATA_TYPE_STRING_ARRAY:
NVPA(elem, string_array, char *, char *, "'%s'");
break;
case DATA_TYPE_NVLIST:
(void) nvpair_value_nvlist(elem, &nvlist_value);
(void) printf("%*s%s:\n", indent, "",
nvpair_name(elem));
dump_nvlist(nvlist_value, indent + 4);
break;
case DATA_TYPE_NVLIST_ARRAY:
(void) nvpair_value_nvlist_array(elem,
&nvlist_array_value, &count);
for (i = 0; i < count; i++) {
(void) printf("%*s%s[%u]:\n", indent, "",
nvpair_name(elem), i);
dump_nvlist(nvlist_array_value[i], indent + 4);
}
break;
default:
(void) printf(dgettext(TEXT_DOMAIN, "bad config type "
"%d for %s\n"), nvpair_type(elem),
nvpair_name(elem));
}
}
}
/*
* Determine if string 'value' matches 'nvp' value. The 'value' string is
* converted, depending on the type of 'nvp', prior to match. For numeric

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _LIBZFS_H
@ -66,7 +65,6 @@ enum {
EZFS_BADSTREAM, /* bad backup stream */
EZFS_DSREADONLY, /* dataset is readonly */
EZFS_VOLTOOBIG, /* volume is too large for 32-bit system */
EZFS_VOLHASDATA, /* volume already contains data */
EZFS_INVALIDNAME, /* invalid dataset name */
EZFS_BADRESTORE, /* unable to restore to destination */
EZFS_BADBACKUP, /* backup failed */
@ -85,17 +83,15 @@ enum {
EZFS_UMOUNTFAILED, /* failed to unmount dataset */
EZFS_UNSHARENFSFAILED, /* unshare(1M) failed */
EZFS_SHARENFSFAILED, /* share(1M) failed */
EZFS_DEVLINKS, /* failed to create zvol links */
EZFS_PERM, /* permission denied */
EZFS_NOSPC, /* out of space */
EZFS_FAULT, /* bad address */
EZFS_IO, /* I/O error */
EZFS_INTR, /* signal received */
EZFS_ISSPARE, /* device is a hot spare */
EZFS_INVALCONFIG, /* invalid vdev configuration */
EZFS_RECURSIVE, /* recursive dependency */
EZFS_NOHISTORY, /* no history object */
EZFS_UNSHAREISCSIFAILED, /* iscsitgtd failed request to unshare */
EZFS_SHAREISCSIFAILED, /* iscsitgtd failed request to share */
EZFS_POOLPROPS, /* couldn't retrieve pool props */
EZFS_POOL_NOTSUP, /* ops not supported for this type of pool */
EZFS_POOL_INVALARG, /* invalid argument for this pool operation */
@ -103,7 +99,6 @@ enum {
EZFS_OPENFAILED, /* open of device failed */
EZFS_NOCAP, /* couldn't get capacity */
EZFS_LABELFAILED, /* write of label failed */
EZFS_ISCSISVCUNAVAIL, /* iscsi service unavailable */
EZFS_BADWHO, /* invalid permission who */
EZFS_BADPERM, /* invalid permission */
EZFS_BADPERMSET, /* invalid permission set name */
@ -119,6 +114,12 @@ enum {
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
EZFS_REFTAG_RELE, /* snapshot release: tag not found */
EZFS_REFTAG_HOLD, /* snapshot hold: tag already exists */
EZFS_TAGTOOLONG, /* snapshot hold/rele: tag too long */
EZFS_PIPEFAILED, /* pipe create failed */
EZFS_THREADCREATEFAILED, /* thread create failed */
EZFS_POSTSPLIT_ONLINE, /* onlining a disk after splitting it */
EZFS_SCRUBBING, /* currently scrubbing */
EZFS_NO_SCRUB, /* no active scrub */
EZFS_UNKNOWN
};
@ -213,11 +214,19 @@ extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
extern int zpool_destroy(zpool_handle_t *);
extern int zpool_add(zpool_handle_t *, nvlist_t *);
typedef struct splitflags {
/* do not split, but return the config that would be split off */
int dryrun : 1;
/* after splitting, import the pool */
int import : 1;
} splitflags_t;
/*
* Functions to manipulate pool and vdev state
*/
extern int zpool_scrub(zpool_handle_t *, pool_scrub_type_t);
extern int zpool_clear(zpool_handle_t *, const char *);
extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
@ -226,9 +235,11 @@ extern int zpool_vdev_attach(zpool_handle_t *, const char *,
const char *, nvlist_t *, int);
extern int zpool_vdev_detach(zpool_handle_t *, const char *);
extern int zpool_vdev_remove(zpool_handle_t *, const char *);
extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
splitflags_t);
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t);
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t);
extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
@ -298,6 +309,7 @@ typedef enum {
extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
extern zpool_status_t zpool_import_status(nvlist_t *, char **);
extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
/*
* Statistics and configuration functions.
@ -319,23 +331,38 @@ extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
/*
* Search for pools to import
*/
typedef struct importargs {
char **path; /* a list of paths to search */
int paths; /* number of paths to search */
char *poolname; /* name of a pool to find */
uint64_t guid; /* guid of a pool to find */
char *cachefile; /* cachefile to use for import */
int can_be_active : 1; /* can the pool be active? */
int unique : 1; /* does 'poolname' already exist? */
int exists : 1; /* set on return if pool already exists */
} importargs_t;
extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
/* legacy pool search routines */
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
char *, uint64_t);
extern nvlist_t *zpool_find_import_byname(libzfs_handle_t *, int, char **,
char *);
extern nvlist_t *zpool_find_import_byguid(libzfs_handle_t *, int, char **,
uint64_t);
extern nvlist_t *zpool_find_import_activeok(libzfs_handle_t *, int, char **);
/*
* Miscellaneous pool functions
*/
struct zfs_cmd;
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *);
extern const char *zfs_history_event_names[LOG_END];
extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
boolean_t verbose);
extern int zpool_upgrade(zpool_handle_t *, uint64_t);
extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
nvlist_t ***, uint_t *);
extern void zpool_set_history_str(const char *subcommand, int argc,
char **argv, char *history_str);
extern int zpool_stage_history(libzfs_handle_t *, const char *);
@ -343,6 +370,8 @@ extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
size_t len);
extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
nvlist_t *);
/*
* Basic handle manipulations. These functions do not create or destroy the
@ -374,6 +403,8 @@ extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
zprop_source_t *, char *, size_t, boolean_t);
extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
boolean_t);
extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
zprop_source_t *, char *, size_t);
extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
@ -381,10 +412,11 @@ extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
char *propbuf, int proplen, boolean_t literal);
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
extern int zfs_prop_inherit(zfs_handle_t *, const char *);
extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
extern const char *zfs_prop_values(zfs_prop_t);
extern int zfs_prop_is_string(zfs_prop_t prop);
extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
typedef struct zprop_list {
int pl_prop;
@ -392,10 +424,11 @@ typedef struct zprop_list {
struct zprop_list *pl_next;
boolean_t pl_all;
size_t pl_width;
size_t pl_recvd_width;
boolean_t pl_fixed;
} zprop_list_t;
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **);
extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
#define ZFS_MOUNTPOINT_NONE "none"
@ -419,13 +452,24 @@ extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
zfs_type_t);
extern void zprop_free_list(zprop_list_t *);
#define ZFS_GET_NCOLS 5
typedef enum {
GET_COL_NONE,
GET_COL_NAME,
GET_COL_PROPERTY,
GET_COL_VALUE,
GET_COL_RECVD,
GET_COL_SOURCE
} zfs_get_column_t;
/*
* Functions for printing zfs or zpool properties
*/
typedef struct zprop_get_cbdata {
int cb_sources;
int cb_columns[4];
int cb_colwidths[5];
zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
int cb_colwidths[ZFS_GET_NCOLS + 1];
boolean_t cb_scripted;
boolean_t cb_literal;
boolean_t cb_first;
@ -434,12 +478,8 @@ typedef struct zprop_get_cbdata {
} zprop_get_cbdata_t;
void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
const char *, const char *, zprop_source_t, const char *);
#define GET_COL_NAME 1
#define GET_COL_PROPERTY 2
#define GET_COL_VALUE 3
#define GET_COL_SOURCE 4
const char *, const char *, zprop_source_t, const char *,
const char *);
/*
* Iterator functions.
@ -450,6 +490,7 @@ extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_snapshots(zfs_handle_t *, zfs_iter_f, void *);
extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
/*
* Functions to create and destroy datasets.
@ -463,11 +504,42 @@ extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
extern int zfs_rename(zfs_handle_t *, const char *, boolean_t);
extern int zfs_send(zfs_handle_t *, const char *, const char *,
boolean_t, boolean_t, boolean_t, boolean_t, int);
typedef struct sendflags {
/* print informational messages (ie, -v was specified) */
int verbose : 1;
/* recursive send (ie, -R) */
int replicate : 1;
/* for incrementals, do all intermediate snapshots */
int doall : 1; /* (ie, -I) */
/* if dataset is a clone, do incremental from its origin */
int fromorigin : 1;
/* do deduplication */
int dedup : 1;
/* send properties (ie, -p) */
int props : 1;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
extern int zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sendflags_t flags, int outfd, snapfilter_cb_t filter_func,
void *cb_arg, nvlist_t **debugnvp);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
boolean_t, boolean_t);
extern int zfs_hold_range(zfs_handle_t *, const char *, const char *,
const char *, boolean_t, boolean_t, snapfilter_cb_t, void *);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_release_range(zfs_handle_t *, const char *, const char *,
const char *, boolean_t);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
uid_t rid, uint64_t space);
@ -482,6 +554,12 @@ typedef struct recvflags {
/* the destination is a prefix, not the exact fs (ie, -d) */
int isprefix : 1;
/*
* Only the tail of the sent snapshot path is appended to the
* destination to determine the received snapshot name (ie, -e).
*/
int istail : 1;
/* do not actually do the recv, just check if it would work (ie, -n) */
int dryrun : 1;
@ -542,10 +620,6 @@ extern int zfs_unshareall_nfs(zfs_handle_t *);
extern int zfs_unshareall_smb(zfs_handle_t *);
extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
extern int zfs_unshareall(zfs_handle_t *);
extern boolean_t zfs_is_shared_iscsi(zfs_handle_t *);
extern int zfs_share_iscsi(zfs_handle_t *);
extern int zfs_unshare_iscsi(zfs_handle_t *);
extern int zfs_iscsi_perm_check(libzfs_handle_t *, char *, ucred_t *);
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
void *, void *, int, zfs_share_op_t);
@ -571,15 +645,10 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
boolean_t *);
/*
* ftyp special. Read the label from a given device.
* Label manipulation.
*/
extern int zpool_read_label(int, nvlist_t **);
/*
* Create and remove zvol /dev links.
*/
extern int zpool_create_zvol_links(zpool_handle_t *);
extern int zpool_remove_zvol_links(zpool_handle_t *);
extern int zpool_clear_label(int);
/* is this zvol valid for use as a dump device? */
extern int zvol_check_dump_config(char *);
@ -600,6 +669,17 @@ int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
/*
* Mappings between vdev and FRU.
*/
extern void libzfs_fru_refresh(libzfs_handle_t *);
extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
const char *);
extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
#ifdef __cplusplus
}
#endif

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -30,7 +30,6 @@
#include <sys/dmu.h>
#include <sys/fs/zfs.h>
#include <sys/zfs_ioctl.h>
#include <sys/zfs_acl.h>
#include <sys/spa.h>
#include <sys/nvpair.h>
@ -38,6 +37,8 @@
#include <libzfs.h>
#include <libshare.h>
#include <fm/libtopo.h>
#ifdef __cplusplus
extern "C" {
#endif
@ -47,6 +48,13 @@ extern "C" {
#endif
#define VERIFY verify
typedef struct libzfs_fru {
char *zf_device;
char *zf_fru;
struct libzfs_fru *zf_chain;
struct libzfs_fru *zf_next;
} libzfs_fru_t;
struct libzfs_handle {
int libzfs_error;
int libzfs_fd;
@ -65,7 +73,13 @@ struct libzfs_handle {
uint_t libzfs_shareflags;
boolean_t libzfs_mnttab_enable;
avl_tree_t libzfs_mnttab_cache;
int libzfs_pool_iter;
topo_hdl_t *libzfs_topo_hdl;
libzfs_fru_t **libzfs_fru_hash;
libzfs_fru_t *libzfs_fru_list;
char libzfs_chassis_id[256];
};
#define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */
struct zfs_handle {
@ -77,6 +91,7 @@ struct zfs_handle {
dmu_objset_stats_t zfs_dmustats;
nvlist_t *zfs_props;
nvlist_t *zfs_user_props;
nvlist_t *zfs_recvd_props;
boolean_t zfs_mntcheck;
char *zfs_mntopts;
uint8_t *zfs_props_table;
@ -112,7 +127,6 @@ typedef enum {
*/
typedef enum {
SHARED_NOT_SHARED = 0x0,
SHARED_ISCSI = 0x1,
SHARED_NFS = 0x2,
SHARED_SMB = 0x4
} zfs_share_type_t;
@ -172,9 +186,6 @@ zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
int zvol_create_link(libzfs_handle_t *, const char *);
int zvol_remove_link(libzfs_handle_t *, const char *);
int zpool_iter_zvol(zpool_handle_t *, int (*)(const char *, void *), void *);
boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
void namespace_clear(libzfs_handle_t *);
@ -189,6 +200,9 @@ extern int zfs_parse_options(char *, zfs_share_proto_t);
extern int zfs_unshare_proto(zfs_handle_t *,
const char *, zfs_share_proto_t *);
extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
#ifdef __cplusplus
}
#endif

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*
* Portions Copyright 2007 Ramprakash Jelari
@ -116,32 +116,7 @@ changelist_prefix(prop_changelist_t *clp)
if (getzoneid() == GLOBAL_ZONEID && cn->cn_zoned)
continue;
if (ZFS_IS_VOLUME(cn->cn_handle)) {
switch (clp->cl_realprop) {
case ZFS_PROP_NAME:
/*
* If this was a rename, unshare the zvol, and
* remove the /dev/zvol links.
*/
(void) zfs_unshare_iscsi(cn->cn_handle);
if (zvol_remove_link(cn->cn_handle->zfs_hdl,
cn->cn_handle->zfs_name) != 0) {
ret = -1;
cn->cn_needpost = B_FALSE;
(void) zfs_share_iscsi(cn->cn_handle);
}
break;
case ZFS_PROP_VOLSIZE:
/*
* If this was a change to the volume size, we
* need to unshare and reshare the volume.
*/
(void) zfs_unshare_iscsi(cn->cn_handle);
break;
}
} else {
if (!ZFS_IS_VOLUME(cn->cn_handle)) {
/*
* Do the property specific processing.
*/
@ -234,32 +209,8 @@ changelist_postfix(prop_changelist_t *clp)
zfs_refresh_properties(cn->cn_handle);
if (ZFS_IS_VOLUME(cn->cn_handle)) {
/*
* If we're doing a rename, recreate the /dev/zvol
* links.
*/
if (clp->cl_realprop == ZFS_PROP_NAME &&
zvol_create_link(cn->cn_handle->zfs_hdl,
cn->cn_handle->zfs_name) != 0) {
errors++;
} else if (cn->cn_shared ||
clp->cl_prop == ZFS_PROP_SHAREISCSI) {
if (zfs_prop_get(cn->cn_handle,
ZFS_PROP_SHAREISCSI, shareopts,
sizeof (shareopts), NULL, NULL, 0,
B_FALSE) == 0 &&
strcmp(shareopts, "off") == 0) {
errors +=
zfs_unshare_iscsi(cn->cn_handle);
} else {
errors +=
zfs_share_iscsi(cn->cn_handle);
}
}
if (ZFS_IS_VOLUME(cn->cn_handle))
continue;
}
/*
* Remount if previously mounted or mountpoint was legacy,
@ -658,8 +609,7 @@ changelist_gather(zfs_handle_t *zhp, zfs_prop_t prop, int gather_flags,
if (clp->cl_prop != ZFS_PROP_MOUNTPOINT &&
clp->cl_prop != ZFS_PROP_SHARENFS &&
clp->cl_prop != ZFS_PROP_SHARESMB &&
clp->cl_prop != ZFS_PROP_SHAREISCSI)
clp->cl_prop != ZFS_PROP_SHARESMB)
return (clp);
/*

View File

@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* The pool configuration repository is stored in /etc/zfs/zpool.cache as a
* single packed nvlist. While it would be nice to just read in this
@ -313,21 +311,33 @@ zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
zpool_handle_t *zhp;
int ret;
if (namespace_reload(hdl) != 0)
/*
* If someone makes a recursive call to zpool_iter(), we want to avoid
* refreshing the namespace because that will invalidate the parent
* context. We allow recursive calls, but simply re-use the same
* namespace AVL tree.
*/
if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
return (-1);
hdl->libzfs_pool_iter++;
for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0)
if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) {
hdl->libzfs_pool_iter--;
return (-1);
}
if (zhp == NULL)
continue;
if ((ret = func(zhp, data)) != 0)
if ((ret = func(zhp, data)) != 0) {
hdl->libzfs_pool_iter--;
return (ret);
}
}
hdl->libzfs_pool_iter--;
return (0);
}

File diff suppressed because it is too large Load Diff

452
lib/libzfs/libzfs_fru.c Normal file
View File

@ -0,0 +1,452 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <dlfcn.h>
#include <errno.h>
#include <libintl.h>
#include <link.h>
#include <pthread.h>
#include <strings.h>
#include <unistd.h>
#include <libzfs.h>
#include <fm/libtopo.h>
#include <sys/fm/protocol.h>
#include <sys/systeminfo.h>
#include "libzfs_impl.h"
/*
* This file is responsible for determining the relationship between I/O
* devices paths and physical locations. In the world of MPxIO and external
* enclosures, the device path is not synonymous with the physical location.
* If you remove a drive and insert it into a different slot, it will end up
* with the same path under MPxIO. If you recable storage enclosures, the
* device paths may change. All of this makes it difficult to implement the
* 'autoreplace' property, which is supposed to automatically manage disk
* replacement based on physical slot.
*
* In order to work around these limitations, we have a per-vdev FRU property
* that is the libtopo path (minus disk-specific authority information) to the
* physical location of the device on the system. This is an optional
* property, and is only needed when using the 'autoreplace' property or when
* generating FMA faults against vdevs.
*/
/*
* Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
* it is not present. We only need this once per library instance, so it is
* not part of the libzfs handle.
*/
static void *_topo_dlhandle;
static topo_hdl_t *(*_topo_open)(int, const char *, int *);
static void (*_topo_close)(topo_hdl_t *);
static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
static void (*_topo_snap_release)(topo_hdl_t *);
static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
topo_walk_cb_t, void *, int *);
static int (*_topo_walk_step)(topo_walk_t *, int);
static void (*_topo_walk_fini)(topo_walk_t *);
static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
static char *(*_topo_node_name)(tnode_t *);
static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
char **, int *);
static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
const char *);
#define ZFS_FRU_HASH_SIZE 257
static size_t
fru_strhash(const char *key)
{
ulong_t g, h = 0;
const char *p;
for (p = key; *p != '\0'; p++) {
h = (h << 4) + *p;
if ((g = (h & 0xf0000000)) != 0) {
h ^= (g >> 24);
h ^= g;
}
}
return (h % ZFS_FRU_HASH_SIZE);
}
static int
libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
{
libzfs_handle_t *hdl = arg;
nvlist_t *fru;
char *devpath, *frustr;
int err;
libzfs_fru_t *frup;
size_t idx;
/*
* If this is the chassis node, and we don't yet have the system
* chassis ID, then fill in this value now.
*/
if (hdl->libzfs_chassis_id[0] == '\0' &&
strcmp(_topo_node_name(tn), "chassis") == 0) {
if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
(void) strlcpy(hdl->libzfs_chassis_id, devpath,
sizeof (hdl->libzfs_chassis_id));
}
/*
* Skip non-disk nodes.
*/
if (strcmp(_topo_node_name(tn), "disk") != 0)
return (TOPO_WALK_NEXT);
/*
* Get the devfs path and FRU.
*/
if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
return (TOPO_WALK_NEXT);
if (libzfs_fru_lookup(hdl, devpath) != NULL) {
_topo_hdl_strfree(thp, devpath);
return (TOPO_WALK_NEXT);
}
if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
_topo_hdl_strfree(thp, devpath);
return (TOPO_WALK_NEXT);
}
/*
* Convert the FRU into a string.
*/
if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
nvlist_free(fru);
_topo_hdl_strfree(thp, devpath);
return (TOPO_WALK_NEXT);
}
nvlist_free(fru);
/*
* Finally, we have a FRU string and device path. Add it to the hash.
*/
if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
_topo_hdl_strfree(thp, devpath);
_topo_hdl_strfree(thp, frustr);
return (TOPO_WALK_NEXT);
}
if ((frup->zf_device = strdup(devpath)) == NULL ||
(frup->zf_fru = strdup(frustr)) == NULL) {
free(frup->zf_device);
free(frup);
_topo_hdl_strfree(thp, devpath);
_topo_hdl_strfree(thp, frustr);
return (TOPO_WALK_NEXT);
}
_topo_hdl_strfree(thp, devpath);
_topo_hdl_strfree(thp, frustr);
idx = fru_strhash(frup->zf_device);
frup->zf_chain = hdl->libzfs_fru_hash[idx];
hdl->libzfs_fru_hash[idx] = frup;
frup->zf_next = hdl->libzfs_fru_list;
hdl->libzfs_fru_list = frup;
return (TOPO_WALK_NEXT);
}
/*
* Called during initialization to setup the dynamic libtopo connection.
*/
#pragma init(libzfs_init_fru)
static void
libzfs_init_fru(void)
{
char path[MAXPATHLEN];
char isa[257];
#if defined(_LP64)
if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
isa[0] = '\0';
#else
isa[0] = '\0';
#endif
(void) snprintf(path, sizeof (path),
"/usr/lib/fm/%s/libtopo.so", isa);
if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
return;
_topo_open = (topo_hdl_t *(*)())
dlsym(_topo_dlhandle, "topo_open");
_topo_close = (void (*)())
dlsym(_topo_dlhandle, "topo_close");
_topo_snap_hold = (char *(*)())
dlsym(_topo_dlhandle, "topo_snap_hold");
_topo_snap_release = (void (*)())
dlsym(_topo_dlhandle, "topo_snap_release");
_topo_walk_init = (topo_walk_t *(*)())
dlsym(_topo_dlhandle, "topo_walk_init");
_topo_walk_step = (int (*)())
dlsym(_topo_dlhandle, "topo_walk_step");
_topo_walk_fini = (void (*)())
dlsym(_topo_dlhandle, "topo_walk_fini");
_topo_hdl_strfree = (void (*)())
dlsym(_topo_dlhandle, "topo_hdl_strfree");
_topo_node_name = (char *(*)())
dlsym(_topo_dlhandle, "topo_node_name");
_topo_prop_get_string = (int (*)())
dlsym(_topo_dlhandle, "topo_prop_get_string");
_topo_node_fru = (int (*)())
dlsym(_topo_dlhandle, "topo_node_fru");
_topo_fmri_nvl2str = (int (*)())
dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
_topo_fmri_strcmp_noauth = (int (*)())
dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
if (_topo_open == NULL || _topo_close == NULL ||
_topo_snap_hold == NULL || _topo_snap_release == NULL ||
_topo_walk_init == NULL || _topo_walk_step == NULL ||
_topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
_topo_node_name == NULL || _topo_prop_get_string == NULL ||
_topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
_topo_fmri_strcmp_noauth == NULL) {
(void) dlclose(_topo_dlhandle);
_topo_dlhandle = NULL;
}
}
/*
* Refresh the mappings from device path -> FMRI. We do this by walking the
* hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
* Note that we strip out the disk-specific authority information (serial,
* part, revision, etc) so that we are left with only the identifying
* characteristics of the slot (hc path and chassis-id).
*/
void
libzfs_fru_refresh(libzfs_handle_t *hdl)
{
int err;
char *uuid;
topo_hdl_t *thp;
topo_walk_t *twp;
if (_topo_dlhandle == NULL)
return;
/*
* Clear the FRU hash and initialize our basic structures.
*/
libzfs_fru_clear(hdl, B_FALSE);
if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
NULL, &err)) == NULL)
return;
thp = hdl->libzfs_topo_hdl;
if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
return;
_topo_hdl_strfree(thp, uuid);
if (hdl->libzfs_fru_hash == NULL &&
(hdl->libzfs_fru_hash =
calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
return;
/*
* We now have a topo snapshot, so iterate over the hc topology looking
* for disks to add to the hash.
*/
twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
libzfs_fru_gather, hdl, &err);
if (twp != NULL) {
(void) _topo_walk_step(twp, TOPO_WALK_CHILD);
_topo_walk_fini(twp);
}
}
/*
* Given a devfs path, return the FRU for the device, if known. This will
* automatically call libzfs_fru_refresh() if it hasn't already been called by
* the consumer. The string returned is valid until the next call to
* libzfs_fru_refresh().
*/
const char *
libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
{
size_t idx = fru_strhash(devpath);
libzfs_fru_t *frup;
if (hdl->libzfs_fru_hash == NULL)
libzfs_fru_refresh(hdl);
if (hdl->libzfs_fru_hash == NULL)
return (NULL);
for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
frup = frup->zf_chain) {
if (strcmp(devpath, frup->zf_device) == 0)
return (frup->zf_fru);
}
return (NULL);
}
/*
* Given a fru path, return the device path. This will automatically call
* libzfs_fru_refresh() if it hasn't already been called by the consumer. The
* string returned is valid until the next call to libzfs_fru_refresh().
*/
const char *
libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
{
libzfs_fru_t *frup;
size_t idx;
if (hdl->libzfs_fru_hash == NULL)
libzfs_fru_refresh(hdl);
if (hdl->libzfs_fru_hash == NULL)
return (NULL);
for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
frup = frup->zf_next) {
if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
fru, frup->zf_fru))
return (frup->zf_device);
}
}
return (NULL);
}
/*
* Change the stored FRU for the given vdev.
*/
int
zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
{
zfs_cmd_t zc = { 0 };
(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
(void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
zc.zc_guid = vdev_guid;
if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
dgettext(TEXT_DOMAIN, "cannot set FRU")));
return (0);
}
/*
* Compare to two FRUs, ignoring any authority information.
*/
boolean_t
libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
{
if (hdl->libzfs_fru_hash == NULL)
libzfs_fru_refresh(hdl);
if (hdl->libzfs_fru_hash == NULL)
return (strcmp(a, b) == 0);
return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
}
/*
* This special function checks to see whether the FRU indicates it's supposed
* to be in the system chassis, but the chassis-id doesn't match. This can
* happen in a clustered case, where both head nodes have the same logical
* disk, but opening the device on the other head node is meaningless.
*/
boolean_t
libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
{
const char *chassisid;
size_t len;
if (hdl->libzfs_fru_hash == NULL)
libzfs_fru_refresh(hdl);
if (hdl->libzfs_chassis_id[0] == '\0')
return (B_FALSE);
if (strstr(fru, "/chassis=0/") == NULL)
return (B_FALSE);
if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
return (B_FALSE);
chassisid += 12;
len = strlen(hdl->libzfs_chassis_id);
if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
(chassisid[len] == '/' || chassisid[len] == ':'))
return (B_FALSE);
return (B_TRUE);
}
/*
* Clear memory associated with the FRU hash.
*/
void
libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
{
libzfs_fru_t *frup;
while ((frup = hdl->libzfs_fru_list) != NULL) {
hdl->libzfs_fru_list = frup->zf_next;
free(frup->zf_device);
free(frup->zf_fru);
free(frup);
}
hdl->libzfs_fru_list = NULL;
if (hdl->libzfs_topo_hdl != NULL) {
_topo_snap_release(hdl->libzfs_topo_hdl);
_topo_close(hdl->libzfs_topo_hdl);
hdl->libzfs_topo_hdl = NULL;
}
if (final) {
free(hdl->libzfs_fru_hash);
} else if (hdl->libzfs_fru_hash != NULL) {
bzero(hdl->libzfs_fru_hash,
ZFS_FRU_HASH_SIZE * sizeof (void *));
}
}

View File

@ -19,12 +19,10 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* Pool import support functions.
*
@ -41,15 +39,21 @@
* using our derived config, and record the results.
*/
#include <ctype.h>
#include <devid.h>
#include <dirent.h>
#include <errno.h>
#include <libintl.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/vtoc.h>
#include <sys/dktp/fdisk.h>
#include <sys/efi_partition.h>
#include <thread_pool.h>
#include <sys/vdev_impl.h>
@ -388,8 +392,6 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
}
if (err) {
(void) zpool_standard_error(hdl, errno,
dgettext(TEXT_DOMAIN, "cannot discover pools"));
zcmd_free_nvlists(&zc);
return (NULL);
}
@ -403,6 +405,21 @@ refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
return (nvl);
}
/*
* Determine if the vdev id is a hole in the namespace.
*/
boolean_t
vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
{
for (int c = 0; c < holes; c++) {
/* Top-level is a hole */
if (hole_array[c] == id)
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Convert our list of pools into the definitive set of configurations. We
* start by picking the best config for each toplevel vdev. Once that's done,
@ -425,17 +442,20 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
uint64_t version, guid;
uint_t children = 0;
nvlist_t **child = NULL;
uint_t holes;
uint64_t *hole_array, max_id;
uint_t c;
boolean_t isactive;
uint64_t hostid;
nvlist_t *nvl;
boolean_t found_one = B_FALSE;
boolean_t valid_top_config = B_FALSE;
if (nvlist_alloc(&ret, 0, 0) != 0)
goto nomem;
for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
uint64_t id;
uint64_t id, max_txg = 0;
if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
goto nomem;
@ -463,6 +483,42 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
}
/*
* We rely on the fact that the max txg for the
* pool will contain the most up-to-date information
* about the valid top-levels in the vdev namespace.
*/
if (best_txg > max_txg) {
(void) nvlist_remove(config,
ZPOOL_CONFIG_VDEV_CHILDREN,
DATA_TYPE_UINT64);
(void) nvlist_remove(config,
ZPOOL_CONFIG_HOLE_ARRAY,
DATA_TYPE_UINT64_ARRAY);
max_txg = best_txg;
hole_array = NULL;
holes = 0;
max_id = 0;
valid_top_config = B_FALSE;
if (nvlist_lookup_uint64(tmp,
ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
verify(nvlist_add_uint64(config,
ZPOOL_CONFIG_VDEV_CHILDREN,
max_id) == 0);
valid_top_config = B_TRUE;
}
if (nvlist_lookup_uint64_array(tmp,
ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
&holes) == 0) {
verify(nvlist_add_uint64_array(config,
ZPOOL_CONFIG_HOLE_ARRAY,
hole_array, holes) == 0);
}
}
if (!config_seen) {
/*
* Copy the relevant pieces of data to the pool
@ -522,6 +578,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
&id) == 0);
if (id >= children) {
nvlist_t **newchild;
@ -542,9 +599,74 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
/*
* If we have information about all the top-levels then
* clean up the nvlist which we've constructed. This
* means removing any extraneous devices that are
* beyond the valid range or adding devices to the end
* of our array which appear to be missing.
*/
if (valid_top_config) {
if (max_id < children) {
for (c = max_id; c < children; c++)
nvlist_free(child[c]);
children = max_id;
} else if (max_id > children) {
nvlist_t **newchild;
newchild = zfs_alloc(hdl, (max_id) *
sizeof (nvlist_t *));
if (newchild == NULL)
goto nomem;
for (c = 0; c < children; c++)
newchild[c] = child[c];
free(child);
child = newchild;
children = max_id;
}
}
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
&guid) == 0);
/*
* The vdev namespace may contain holes as a result of
* device removal. We must add them back into the vdev
* tree before we process any missing devices.
*/
if (holes > 0) {
ASSERT(valid_top_config);
for (c = 0; c < children; c++) {
nvlist_t *holey;
if (child[c] != NULL ||
!vdev_is_hole(hole_array, holes, c))
continue;
if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
0) != 0)
goto nomem;
/*
* Holes in the namespace are treated as
* "hole" top-level vdevs and have a
* special flag set on them.
*/
if (nvlist_add_string(holey,
ZPOOL_CONFIG_TYPE,
VDEV_TYPE_HOLE) != 0 ||
nvlist_add_uint64(holey,
ZPOOL_CONFIG_ID, c) != 0 ||
nvlist_add_uint64(holey,
ZPOOL_CONFIG_GUID, 0ULL) != 0)
goto nomem;
child[c] = holey;
}
}
/*
* Look for any missing top-level vdevs. If this is the case,
* create a faked up 'missing' vdev as a placeholder. We cannot
@ -552,7 +674,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
* certain checks to make sure the vdev IDs match their location
* in the configuration.
*/
for (c = 0; c < children; c++)
for (c = 0; c < children; c++) {
if (child[c] == NULL) {
nvlist_t *missing;
if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
@ -570,6 +692,7 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
}
child[c] = missing;
}
}
/*
* Put all of this pool's top-level vdevs into a root vdev.
@ -636,8 +759,11 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
continue;
}
if ((nvl = refresh_config(hdl, config)) == NULL)
goto error;
if ((nvl = refresh_config(hdl, config)) == NULL) {
nvlist_free(config);
config = NULL;
continue;
}
nvlist_free(config);
config = nvl;
@ -777,6 +903,212 @@ zpool_read_label(int fd, nvlist_t **config)
return (0);
}
typedef struct rdsk_node {
char *rn_name;
int rn_dfd;
libzfs_handle_t *rn_hdl;
nvlist_t *rn_config;
avl_tree_t *rn_avl;
avl_node_t rn_node;
boolean_t rn_nozpool;
} rdsk_node_t;
static int
slice_cache_compare(const void *arg1, const void *arg2)
{
const char *nm1 = ((rdsk_node_t *)arg1)->rn_name;
const char *nm2 = ((rdsk_node_t *)arg2)->rn_name;
char *nm1slice, *nm2slice;
int rv;
/*
* slices zero and two are the most likely to provide results,
* so put those first
*/
nm1slice = strstr(nm1, "s0");
nm2slice = strstr(nm2, "s0");
if (nm1slice && !nm2slice) {
return (-1);
}
if (!nm1slice && nm2slice) {
return (1);
}
nm1slice = strstr(nm1, "s2");
nm2slice = strstr(nm2, "s2");
if (nm1slice && !nm2slice) {
return (-1);
}
if (!nm1slice && nm2slice) {
return (1);
}
rv = strcmp(nm1, nm2);
if (rv == 0)
return (0);
return (rv > 0 ? 1 : -1);
}
static void
check_one_slice(avl_tree_t *r, char *diskname, uint_t partno,
diskaddr_t size, uint_t blksz)
{
rdsk_node_t tmpnode;
rdsk_node_t *node;
char sname[MAXNAMELEN];
tmpnode.rn_name = &sname[0];
(void) snprintf(tmpnode.rn_name, MAXNAMELEN, "%s%u",
diskname, partno);
/*
* protect against division by zero for disk labels that
* contain a bogus sector size
*/
if (blksz == 0)
blksz = DEV_BSIZE;
/* too small to contain a zpool? */
if ((size < (SPA_MINDEVSIZE / blksz)) &&
(node = avl_find(r, &tmpnode, NULL)))
node->rn_nozpool = B_TRUE;
}
static void
nozpool_all_slices(avl_tree_t *r, const char *sname)
{
char diskname[MAXNAMELEN];
char *ptr;
int i;
(void) strncpy(diskname, sname, MAXNAMELEN);
if (((ptr = strrchr(diskname, 's')) == NULL) &&
((ptr = strrchr(diskname, 'p')) == NULL))
return;
ptr[0] = 's';
ptr[1] = '\0';
for (i = 0; i < NDKMAP; i++)
check_one_slice(r, diskname, i, 0, 1);
ptr[0] = 'p';
for (i = 0; i <= FD_NUMPART; i++)
check_one_slice(r, diskname, i, 0, 1);
}
static void
check_slices(avl_tree_t *r, int fd, const char *sname)
{
struct extvtoc vtoc;
struct dk_gpt *gpt;
char diskname[MAXNAMELEN];
char *ptr;
int i;
(void) strncpy(diskname, sname, MAXNAMELEN);
if ((ptr = strrchr(diskname, 's')) == NULL || !isdigit(ptr[1]))
return;
ptr[1] = '\0';
if (read_extvtoc(fd, &vtoc) >= 0) {
for (i = 0; i < NDKMAP; i++)
check_one_slice(r, diskname, i,
vtoc.v_part[i].p_size, vtoc.v_sectorsz);
} else if (efi_alloc_and_read(fd, &gpt) >= 0) {
/*
* on x86 we'll still have leftover links that point
* to slices s[9-15], so use NDKMAP instead
*/
for (i = 0; i < NDKMAP; i++)
check_one_slice(r, diskname, i,
gpt->efi_parts[i].p_size, gpt->efi_lbasize);
/* nodes p[1-4] are never used with EFI labels */
ptr[0] = 'p';
for (i = 1; i <= FD_NUMPART; i++)
check_one_slice(r, diskname, i, 0, 1);
efi_free(gpt);
}
}
static void
zpool_open_func(void *arg)
{
rdsk_node_t *rn = arg;
struct stat64 statbuf;
nvlist_t *config;
int fd;
if (rn->rn_nozpool)
return;
if ((fd = openat64(rn->rn_dfd, rn->rn_name, O_RDONLY)) < 0) {
/* symlink to a device that's no longer there */
if (errno == ENOENT)
nozpool_all_slices(rn->rn_avl, rn->rn_name);
return;
}
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
*/
if (fstat64(fd, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) &&
!S_ISCHR(statbuf.st_mode) &&
!S_ISBLK(statbuf.st_mode))) {
(void) close(fd);
return;
}
/* this file is too small to hold a zpool */
if (S_ISREG(statbuf.st_mode) &&
statbuf.st_size < SPA_MINDEVSIZE) {
(void) close(fd);
return;
} else if (!S_ISREG(statbuf.st_mode)) {
/*
* Try to read the disk label first so we don't have to
* open a bunch of minor nodes that can't have a zpool.
*/
check_slices(rn->rn_avl, fd, rn->rn_name);
}
if ((zpool_read_label(fd, &config)) != 0) {
(void) close(fd);
(void) no_memory(rn->rn_hdl);
return;
}
(void) close(fd);
rn->rn_config = config;
if (config != NULL) {
assert(rn->rn_nozpool == B_FALSE);
}
}
/*
* Given a file descriptor, clear (zero) the label information. This function
* is currently only used in the appliance stack as part of the ZFS sysevent
* module.
*/
int
zpool_clear_label(int fd)
{
struct stat64 statbuf;
int l;
vdev_label_t *label;
uint64_t size;
if (fstat64(fd, &statbuf) == -1)
return (0);
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
return (-1);
for (l = 0; l < VDEV_LABELS; l++) {
if (pwrite64(fd, label, sizeof (vdev_label_t),
label_offset(size, l)) != sizeof (vdev_label_t))
return (-1);
}
free(label);
return (0);
}
/*
* Given a list of directories to search, find all pools stored on disk. This
* includes partial pools which are not available to import. If no args are
@ -785,30 +1117,28 @@ zpool_read_label(int fd, nvlist_t **config)
* to import a specific pool.
*/
static nvlist_t *
zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
boolean_t active_ok, char *poolname, uint64_t guid)
zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
{
int i;
int i, dirs = iarg->paths;
DIR *dirp = NULL;
struct dirent64 *dp;
char path[MAXPATHLEN];
char *end;
char *end, **dir = iarg->path;
size_t pathleft;
struct stat64 statbuf;
nvlist_t *ret = NULL, *config;
nvlist_t *ret = NULL;
static char *default_dir = "/dev/dsk";
int fd;
pool_list_t pools = { 0 };
pool_entry_t *pe, *penext;
vdev_entry_t *ve, *venext;
config_entry_t *ce, *cenext;
name_entry_t *ne, *nenext;
avl_tree_t slice_cache;
rdsk_node_t *slice;
void *cookie;
verify(poolname == NULL || guid == 0);
if (argc == 0) {
argc = 1;
argv = &default_dir;
if (dirs == 0) {
dirs = 1;
dir = &default_dir;
}
/*
@ -816,15 +1146,15 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
* possible device, organizing the information according to pool GUID
* and toplevel GUID.
*/
for (i = 0; i < argc; i++) {
for (i = 0; i < dirs; i++) {
tpool_t *t;
char *rdsk;
int dfd;
/* use realpath to normalize the path */
if (realpath(argv[i], path) == 0) {
if (realpath(dir[i], path) == 0) {
(void) zfs_error_fmt(hdl, EZFS_BADPATH,
dgettext(TEXT_DOMAIN, "cannot open '%s'"),
argv[i]);
dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
goto error;
}
end = &path[strlen(path)];
@ -851,6 +1181,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
goto error;
}
avl_create(&slice_cache, slice_cache_compare,
sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node));
/*
* This is not MT-safe, but we have no MT consumers of libzfs
*/
@ -860,46 +1192,53 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
(name[1] == 0 || (name[1] == '.' && name[2] == 0)))
continue;
if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
continue;
slice = zfs_alloc(hdl, sizeof (rdsk_node_t));
slice->rn_name = zfs_strdup(hdl, name);
slice->rn_avl = &slice_cache;
slice->rn_dfd = dfd;
slice->rn_hdl = hdl;
slice->rn_nozpool = B_FALSE;
avl_add(&slice_cache, slice);
}
/*
* create a thread pool to do all of this in parallel;
* rn_nozpool is not protected, so this is racy in that
* multiple tasks could decide that the same slice can
* not hold a zpool, which is benign. Also choose
* double the number of processors; we hold a lot of
* locks in the kernel, so going beyond this doesn't
* buy us much.
*/
t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN),
0, NULL);
for (slice = avl_first(&slice_cache); slice;
(slice = avl_walk(&slice_cache, slice,
AVL_AFTER)))
(void) tpool_dispatch(t, zpool_open_func, slice);
tpool_wait(t);
tpool_destroy(t);
/*
* Ignore failed stats. We only want regular
* files, character devs and block devs.
*/
if (fstat64(fd, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) &&
!S_ISCHR(statbuf.st_mode) &&
!S_ISBLK(statbuf.st_mode))) {
(void) close(fd);
continue;
}
if ((zpool_read_label(fd, &config)) != 0) {
(void) close(fd);
(void) no_memory(hdl);
goto error;
}
(void) close(fd);
if (config != NULL) {
cookie = NULL;
while ((slice = avl_destroy_nodes(&slice_cache,
&cookie)) != NULL) {
if (slice->rn_config != NULL) {
nvlist_t *config = slice->rn_config;
boolean_t matched = B_TRUE;
if (poolname != NULL) {
if (iarg->poolname != NULL) {
char *pname;
matched = nvlist_lookup_string(config,
ZPOOL_CONFIG_POOL_NAME,
&pname) == 0 &&
strcmp(poolname, pname) == 0;
} else if (guid != 0) {
strcmp(iarg->poolname, pname) == 0;
} else if (iarg->guid != 0) {
uint64_t this_guid;
matched = nvlist_lookup_uint64(config,
ZPOOL_CONFIG_POOL_GUID,
&this_guid) == 0 &&
guid == this_guid;
iarg->guid == this_guid;
}
if (!matched) {
nvlist_free(config);
@ -907,17 +1246,20 @@ zpool_find_import_impl(libzfs_handle_t *hdl, int argc, char **argv,
continue;
}
/* use the non-raw path for the config */
(void) strlcpy(end, name, pathleft);
(void) strlcpy(end, slice->rn_name, pathleft);
if (add_config(hdl, &pools, path, config) != 0)
goto error;
}
free(slice->rn_name);
free(slice);
}
avl_destroy(&slice_cache);
(void) closedir(dirp);
dirp = NULL;
}
ret = get_configs(hdl, &pools, active_ok);
ret = get_configs(hdl, &pools, iarg->can_be_active);
error:
for (pe = pools.pools; pe != NULL; pe = penext) {
@ -951,27 +1293,12 @@ error:
nvlist_t *
zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
{
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, 0));
}
importargs_t iarg = { 0 };
nvlist_t *
zpool_find_import_byname(libzfs_handle_t *hdl, int argc, char **argv,
char *pool)
{
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, pool, 0));
}
iarg.paths = argc;
iarg.path = argv;
nvlist_t *
zpool_find_import_byguid(libzfs_handle_t *hdl, int argc, char **argv,
uint64_t guid)
{
return (zpool_find_import_impl(hdl, argc, argv, B_FALSE, NULL, guid));
}
nvlist_t *
zpool_find_import_activeok(libzfs_handle_t *hdl, int argc, char **argv)
{
return (zpool_find_import_impl(hdl, argc, argv, B_TRUE, NULL, 0));
return (zpool_find_import_impl(hdl, &iarg));
}
/*
@ -1093,6 +1420,46 @@ zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
return (pools);
}
static int
name_or_guid_exists(zpool_handle_t *zhp, void *data)
{
importargs_t *import = data;
int found = 0;
if (import->poolname != NULL) {
char *pool_name;
verify(nvlist_lookup_string(zhp->zpool_config,
ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
if (strcmp(pool_name, import->poolname) == 0)
found = 1;
} else {
uint64_t pool_guid;
verify(nvlist_lookup_uint64(zhp->zpool_config,
ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
if (pool_guid == import->guid)
found = 1;
}
zpool_close(zhp);
return (found);
}
nvlist_t *
zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
{
verify(import->poolname == NULL || import->guid == 0);
if (import->unique)
import->exists = zpool_iter(hdl, name_or_guid_exists, import);
if (import->cachefile != NULL)
return (zpool_find_import_cached(hdl, import->cachefile,
import->poolname, import->guid));
return (zpool_find_import_impl(hdl, import));
}
boolean_t
find_guid(nvlist_t *nv, uint64_t guid)

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -44,17 +43,14 @@
*
* zfs_is_shared_nfs()
* zfs_is_shared_smb()
* zfs_is_shared_iscsi()
* zfs_share_proto()
* zfs_shareall();
* zfs_share_iscsi()
* zfs_unshare_nfs()
* zfs_unshare_smb()
* zfs_unshareall_nfs()
* zfs_unshareall_smb()
* zfs_unshareall()
* zfs_unshareall_bypath()
* zfs_unshare_iscsi()
*
* The following functions are available for pool consumers, and will
* mount/unmount and share/unshare all datasets within pool:
@ -89,11 +85,6 @@ static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
zfs_share_proto_t);
static int (*iscsitgt_zfs_share)(const char *);
static int (*iscsitgt_zfs_unshare)(const char *);
static int (*iscsitgt_zfs_is_shared)(const char *);
static int (*iscsitgt_svc_online)();
/*
* The share protocols table must be in the same order as the zfs_share_prot_t
* enum in libzfs_impl.h
@ -125,29 +116,6 @@ zfs_share_proto_t share_all_proto[] = {
PROTO_END
};
#pragma init(zfs_iscsi_init)
static void
zfs_iscsi_init(void)
{
void *libiscsitgt;
if ((libiscsitgt = dlopen("/lib/libiscsitgt.so.1",
RTLD_LAZY | RTLD_GLOBAL)) == NULL ||
(iscsitgt_zfs_share = (int (*)(const char *))dlsym(libiscsitgt,
"iscsitgt_zfs_share")) == NULL ||
(iscsitgt_zfs_unshare = (int (*)(const char *))dlsym(libiscsitgt,
"iscsitgt_zfs_unshare")) == NULL ||
(iscsitgt_zfs_is_shared = (int (*)(const char *))dlsym(libiscsitgt,
"iscsitgt_zfs_is_shared")) == NULL ||
(iscsitgt_svc_online = (int (*)(const char *))dlsym(libiscsitgt,
"iscsitgt_svc_online")) == NULL) {
iscsitgt_zfs_share = NULL;
iscsitgt_zfs_unshare = NULL;
iscsitgt_zfs_is_shared = NULL;
iscsitgt_svc_online = NULL;
}
}
/*
* Search the sharetab for the given mountpoint and protocol, returning
* a zfs_share_type_t value.
@ -345,6 +313,18 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
} else if (errno == EPERM) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Insufficient privileges"));
} else if (errno == ENOTSUP) {
char buf[256];
int spa_version;
VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
(void) snprintf(buf, sizeof (buf),
dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
"file system on a version %d pool. Pool must be"
" upgraded to mount this file system."),
(u_longlong_t)zfs_prop_get_int(zhp,
ZFS_PROP_VERSION), spa_version);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
} else {
zfs_error_aux(hdl, strerror(errno));
}
@ -445,7 +425,7 @@ zfs_is_shared(zfs_handle_t *zhp)
zfs_share_proto_t *curr_proto;
if (ZFS_IS_VOLUME(zhp))
return (zfs_is_shared_iscsi(zhp));
return (B_FALSE);
for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
curr_proto++)
@ -458,7 +438,7 @@ int
zfs_share(zfs_handle_t *zhp)
{
if (ZFS_IS_VOLUME(zhp))
return (zfs_share_iscsi(zhp));
return (0);
return (zfs_share_proto(zhp, share_all_proto));
}
@ -467,7 +447,7 @@ int
zfs_unshare(zfs_handle_t *zhp)
{
if (ZFS_IS_VOLUME(zhp))
return (zfs_unshare_iscsi(zhp));
return (0);
return (zfs_unshareall(zhp));
}
@ -999,81 +979,6 @@ remove_mountpoint(zfs_handle_t *zhp)
}
}
boolean_t
zfs_is_shared_iscsi(zfs_handle_t *zhp)
{
/*
* If iscsi deamon isn't running then we aren't shared
*/
if (iscsitgt_svc_online && iscsitgt_svc_online() == 1)
return (B_FALSE);
else
return (iscsitgt_zfs_is_shared != NULL &&
iscsitgt_zfs_is_shared(zhp->zfs_name) != 0);
}
int
zfs_share_iscsi(zfs_handle_t *zhp)
{
char shareopts[ZFS_MAXPROPLEN];
const char *dataset = zhp->zfs_name;
libzfs_handle_t *hdl = zhp->zfs_hdl;
/*
* Return success if there are no share options.
*/
if (zfs_prop_get(zhp, ZFS_PROP_SHAREISCSI, shareopts,
sizeof (shareopts), NULL, NULL, 0, B_FALSE) != 0 ||
strcmp(shareopts, "off") == 0)
return (0);
if (iscsitgt_zfs_share == NULL || iscsitgt_zfs_share(dataset) != 0) {
int error = EZFS_SHAREISCSIFAILED;
/*
* If service isn't availabele and EPERM was
* returned then use special error.
*/
if (iscsitgt_svc_online && errno == EPERM &&
(iscsitgt_svc_online() != 0))
error = EZFS_ISCSISVCUNAVAIL;
return (zfs_error_fmt(hdl, error,
dgettext(TEXT_DOMAIN, "cannot share '%s'"), dataset));
}
return (0);
}
int
zfs_unshare_iscsi(zfs_handle_t *zhp)
{
const char *dataset = zfs_get_name(zhp);
libzfs_handle_t *hdl = zhp->zfs_hdl;
/*
* Return if the volume is not shared
*/
if (zfs_is_shared_iscsi(zhp) != SHARED_ISCSI)
return (0);
/*
* If this fails with ENODEV it indicates that zvol wasn't shared so
* we should return success in that case.
*/
if (iscsitgt_zfs_unshare == NULL ||
(iscsitgt_zfs_unshare(dataset) != 0 && errno != ENODEV)) {
if (errno == EPERM)
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"Insufficient privileges to unshare iscsi"));
return (zfs_error_fmt(hdl, EZFS_UNSHAREISCSIFAILED,
dgettext(TEXT_DOMAIN, "cannot unshare '%s'"), dataset));
}
return (0);
}
typedef struct mount_cbdata {
zfs_handle_t **cb_datasets;
int cb_used;
@ -1215,28 +1120,6 @@ out:
return (ret);
}
static int
zvol_cb(const char *dataset, void *data)
{
libzfs_handle_t *hdl = data;
zfs_handle_t *zhp;
/*
* Ignore snapshots and ignore failures from non-existant datasets.
*/
if (strchr(dataset, '@') != NULL ||
(zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL)
return (0);
if (zfs_unshare_iscsi(zhp) != 0)
return (-1);
zfs_close(zhp);
return (0);
}
static int
mountpoint_compare(const void *a, const void *b)
{
@ -1246,6 +1129,8 @@ mountpoint_compare(const void *a, const void *b)
return (strcmp(mountb, mounta));
}
/* alias for 2002/240 */
#pragma weak zpool_unmount_datasets = zpool_disable_datasets
/*
* Unshare and unmount all datasets within the given pool. We don't want to
* rely on traversing the DSL to discover the filesystems within the pool,
@ -1253,7 +1138,6 @@ mountpoint_compare(const void *a, const void *b)
* arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and
* gather all the filesystems that are currently mounted.
*/
#pragma weak zpool_unmount_datasets = zpool_disable_datasets
int
zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
{
@ -1267,12 +1151,6 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
int ret = -1;
int flags = (force ? MS_FORCE : 0);
/*
* First unshare all zvols.
*/
if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0)
return (-1);
namelen = strlen(zhp->zpool_name);
rewind(hdl->libzfs_mnttab);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -138,7 +137,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
if (find_vdev_problem(child[c], func))
return (B_TRUE);
} else {
verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) == 0);
if (func(vs->vs_state, vs->vs_aux,
@ -173,7 +172,8 @@ check_status(nvlist_t *config, boolean_t isimport)
{
nvlist_t *nvroot;
vdev_stat_t *vs;
uint_t vsc;
pool_scan_stat_t *ps = NULL;
uint_t vsc, psc;
uint64_t nerr;
uint64_t version;
uint64_t stateval;
@ -184,15 +184,24 @@ check_status(nvlist_t *config, boolean_t isimport)
&version) == 0);
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) == 0);
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &vsc) == 0);
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
&stateval) == 0);
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
/*
* Currently resilvering a vdev
*/
(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
ps->pss_state == DSS_SCANNING)
return (ZPOOL_STATUS_RESILVERING);
/*
* Pool last accessed by another system.
*/
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
if (hostid != 0 && (unsigned long)hostid != gethostid() &&
stateval == POOL_STATE_ACTIVE)
return (ZPOOL_STATUS_HOSTID_MISMATCH);
@ -288,12 +297,6 @@ check_status(nvlist_t *config, boolean_t isimport)
if (find_vdev_problem(nvroot, vdev_removed))
return (ZPOOL_STATUS_REMOVED_DEV);
/*
* Currently resilvering
*/
if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
return (ZPOOL_STATUS_RESILVERING);
/*
* Outdated, but usable, version
*/
@ -328,3 +331,68 @@ zpool_import_status(nvlist_t *config, char **msgid)
return (ret);
}
static void
dump_ddt_stat(const ddt_stat_t *dds, int h)
{
char refcnt[6];
char blocks[6], lsize[6], psize[6], dsize[6];
char ref_blocks[6], ref_lsize[6], ref_psize[6], ref_dsize[6];
if (dds == NULL || dds->dds_blocks == 0)
return;
if (h == -1)
(void) strcpy(refcnt, "Total");
else
zfs_nicenum(1ULL << h, refcnt, sizeof (refcnt));
zfs_nicenum(dds->dds_blocks, blocks, sizeof (blocks));
zfs_nicenum(dds->dds_lsize, lsize, sizeof (lsize));
zfs_nicenum(dds->dds_psize, psize, sizeof (psize));
zfs_nicenum(dds->dds_dsize, dsize, sizeof (dsize));
zfs_nicenum(dds->dds_ref_blocks, ref_blocks, sizeof (ref_blocks));
zfs_nicenum(dds->dds_ref_lsize, ref_lsize, sizeof (ref_lsize));
zfs_nicenum(dds->dds_ref_psize, ref_psize, sizeof (ref_psize));
zfs_nicenum(dds->dds_ref_dsize, ref_dsize, sizeof (ref_dsize));
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
refcnt,
blocks, lsize, psize, dsize,
ref_blocks, ref_lsize, ref_psize, ref_dsize);
}
/*
* Print the DDT histogram and the column totals.
*/
void
zpool_dump_ddt(const ddt_stat_t *dds_total, const ddt_histogram_t *ddh)
{
int h;
(void) printf("\n");
(void) printf("bucket "
" allocated "
" referenced \n");
(void) printf("______ "
"______________________________ "
"______________________________\n");
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
"refcnt",
"blocks", "LSIZE", "PSIZE", "DSIZE",
"blocks", "LSIZE", "PSIZE", "DSIZE");
(void) printf("%6s %6s %5s %5s %5s %6s %5s %5s %5s\n",
"------",
"------", "-----", "-----", "-----",
"------", "-----", "-----", "-----");
for (h = 0; h < 64; h++)
dump_ddt_stat(&ddh->ddh_stat[h], h);
dump_ddt_stat(dds_total, -1);
(void) printf("\n");
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -94,8 +93,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_VOLTOOBIG:
return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
"this system"));
case EZFS_VOLHASDATA:
return (dgettext(TEXT_DOMAIN, "volume has data"));
case EZFS_INVALIDNAME:
return (dgettext(TEXT_DOMAIN, "invalid name"));
case EZFS_BADRESTORE:
@ -138,16 +135,12 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
case EZFS_SHARESMBFAILED:
return (dgettext(TEXT_DOMAIN, "smb add share failed"));
case EZFS_ISCSISVCUNAVAIL:
return (dgettext(TEXT_DOMAIN,
"iscsitgt service need to be enabled by "
"a privileged user"));
case EZFS_DEVLINKS:
return (dgettext(TEXT_DOMAIN, "failed to create /dev links"));
case EZFS_PERM:
return (dgettext(TEXT_DOMAIN, "permission denied"));
case EZFS_NOSPC:
return (dgettext(TEXT_DOMAIN, "out of space"));
case EZFS_FAULT:
return (dgettext(TEXT_DOMAIN, "bad address"));
case EZFS_IO:
return (dgettext(TEXT_DOMAIN, "I/O error"));
case EZFS_INTR:
@ -161,12 +154,6 @@ libzfs_error_description(libzfs_handle_t *hdl)
return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
case EZFS_NOHISTORY:
return (dgettext(TEXT_DOMAIN, "no history available"));
case EZFS_UNSHAREISCSIFAILED:
return (dgettext(TEXT_DOMAIN,
"iscsitgtd failed request to unshare"));
case EZFS_SHAREISCSIFAILED:
return (dgettext(TEXT_DOMAIN,
"iscsitgtd failed request to share"));
case EZFS_POOLPROPS:
return (dgettext(TEXT_DOMAIN, "failed to retrieve "
"pool properties"));
@ -218,6 +205,20 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_REFTAG_HOLD:
return (dgettext(TEXT_DOMAIN, "tag already exists on this "
"dataset"));
case EZFS_TAGTOOLONG:
return (dgettext(TEXT_DOMAIN, "tag too long"));
case EZFS_PIPEFAILED:
return (dgettext(TEXT_DOMAIN, "pipe create failed"));
case EZFS_THREADCREATEFAILED:
return (dgettext(TEXT_DOMAIN, "thread create failed"));
case EZFS_POSTSPLIT_ONLINE:
return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
"into a new one"));
case EZFS_SCRUBBING:
return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
"use 'zpool scrub -s' to cancel current scrub"));
case EZFS_NO_SCRUB:
return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
@ -306,6 +307,10 @@ zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
zfs_verror(hdl, EZFS_IO, fmt, ap);
return (-1);
case EFAULT:
zfs_verror(hdl, EZFS_FAULT, fmt, ap);
return (-1);
case EINTR:
zfs_verror(hdl, EZFS_INTR, fmt, ap);
return (-1);
@ -378,7 +383,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
break;
default:
zfs_error_aux(hdl, strerror(errno));
zfs_error_aux(hdl, strerror(error));
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
break;
}
@ -610,6 +615,7 @@ libzfs_fini(libzfs_handle_t *hdl)
if (hdl->libzfs_log_str)
(void) free(hdl->libzfs_log_str);
zpool_free_handles(hdl);
libzfs_fru_clear(hdl, B_TRUE);
namespace_clear(hdl);
libzfs_mnttab_fini(hdl);
free(hdl);
@ -686,7 +692,7 @@ int
zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
{
if (len == 0)
len = 2048;
len = 4*1024;
zc->zc_nvlist_dst_size = len;
if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == NULL)
@ -812,6 +818,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
"PROPERTY"));
cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
"VALUE"));
cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN,
"RECEIVED"));
cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
"SOURCE"));
@ -825,7 +833,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
* inheriting from the longest name. This is acceptable because in the
* majority of cases 'SOURCE' is the last column displayed, and we don't
* use the width anyway. Note that the 'VALUE' column can be oversized,
* if the name of the property is much longer the any values we find.
* if the name of the property is much longer than any values we find.
*/
for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
/*
@ -856,6 +864,11 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
/* 'RECEIVED' column. */
if (pl != cbp->cb_proplist &&
pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD])
cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width;
/*
* 'NAME' and 'SOURCE' columns
*/
@ -871,7 +884,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
/*
* Now go through and print the headers.
*/
for (i = 0; i < 4; i++) {
for (i = 0; i < ZFS_GET_NCOLS; i++) {
switch (cbp->cb_columns[i]) {
case GET_COL_NAME:
title = dgettext(TEXT_DOMAIN, "NAME");
@ -882,6 +895,9 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
case GET_COL_VALUE:
title = dgettext(TEXT_DOMAIN, "VALUE");
break;
case GET_COL_RECVD:
title = dgettext(TEXT_DOMAIN, "RECEIVED");
break;
case GET_COL_SOURCE:
title = dgettext(TEXT_DOMAIN, "SOURCE");
break;
@ -890,7 +906,8 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
}
if (title != NULL) {
if (i == 3 || cbp->cb_columns[i + 1] == 0)
if (i == (ZFS_GET_NCOLS - 1) ||
cbp->cb_columns[i + 1] == GET_COL_NONE)
(void) printf("%s", title);
else
(void) printf("%-*s ",
@ -908,7 +925,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
void
zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
const char *propname, const char *value, zprop_source_t sourcetype,
const char *source)
const char *source, const char *recvd_value)
{
int i;
const char *str;
@ -923,7 +940,7 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
if (cbp->cb_first)
zprop_print_headers(cbp, cbp->cb_type);
for (i = 0; i < 4; i++) {
for (i = 0; i < ZFS_GET_NCOLS; i++) {
switch (cbp->cb_columns[i]) {
case GET_COL_NAME:
str = name;
@ -960,14 +977,21 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
"inherited from %s", source);
str = buf;
break;
case ZPROP_SRC_RECEIVED:
str = "received";
break;
}
break;
case GET_COL_RECVD:
str = (recvd_value == NULL ? "-" : recvd_value);
break;
default:
continue;
}
if (cbp->cb_columns[i + 1] == 0)
if (cbp->cb_columns[i + 1] == GET_COL_NONE)
(void) printf("%s", str);
else if (cbp->cb_scripted)
(void) printf("%s\t", str);
@ -975,7 +999,6 @@ zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
(void) printf("%-*s ",
cbp->cb_colwidths[cbp->cb_columns[i]],
str);
}
(void) printf("\n");
@ -1037,7 +1060,7 @@ zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
return (-1);
}
/* Rely on stroull() to process the numeric portion. */
/* Rely on strtoull() to process the numeric portion. */
errno = 0;
*num = strtoull(value, &end, 10);

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -75,6 +75,7 @@ extern "C" {
#include <sys/u8_textprep.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/sysevent/dev.h>
#include <sys/sunddi.h>
/*
* Debugging
@ -105,21 +106,27 @@ extern void vpanic(const char *, __va_list);
#define fm_panic panic
extern int aok;
/* This definition is copied from assert.h. */
#if defined(__STDC__)
#if __STDC_VERSION__ - 0 >= 199901L
#define verify(EX) (void)((EX) || \
#define zverify(EX) (void)((EX) || (aok) || \
(__assert_c99(#EX, __FILE__, __LINE__, __func__), 0))
#else
#define verify(EX) (void)((EX) || (__assert(#EX, __FILE__, __LINE__), 0))
#define zverify(EX) (void)((EX) || (aok) || \
(__assert(#EX, __FILE__, __LINE__), 0))
#endif /* __STDC_VERSION__ - 0 >= 199901L */
#else
#define verify(EX) (void)((EX) || (_assert("EX", __FILE__, __LINE__), 0))
#define zverify(EX) (void)((EX) || (aok) || \
(_assert("EX", __FILE__, __LINE__), 0))
#endif /* __STDC__ */
#define VERIFY verify
#define ASSERT assert
#define VERIFY zverify
#define ASSERT zverify
#undef assert
#define assert zverify
extern void __assert(const char *, const char *, int);
@ -130,7 +137,7 @@ extern void __assert(const char *, const char *, int);
#define VERIFY3_IMPL(LEFT, OP, RIGHT, TYPE) do { \
const TYPE __left = (TYPE)(LEFT); \
const TYPE __right = (TYPE)(RIGHT); \
if (!(__left OP __right)) { \
if (!(__left OP __right) && (!aok)) { \
char *__buf = alloca(256); \
(void) snprintf(__buf, 256, "%s %s %s (0x%llx %s 0x%llx)", \
#LEFT, #OP, #RIGHT, \
@ -196,6 +203,18 @@ typedef struct kthread kthread_t;
#define thread_create(stk, stksize, func, arg, len, pp, state, pri) \
zk_thread_create(func, arg)
#define thread_exit() thr_exit(NULL)
#define thread_join(t) panic("libzpool cannot join threads")
#define newproc(f, a, cid, pri, ctp, pid) (ENOSYS)
/* in libzpool, p0 exists only to have its address taken */
struct proc {
uintptr_t this_is_never_used_dont_dereference_it;
};
extern struct proc p0;
#define PS_NONE -1
extern kthread_t *zk_thread_create(void (*func)(), void *arg);
@ -318,20 +337,27 @@ typedef void (task_func_t)(void *);
#define TASKQ_PREPOPULATE 0x0001
#define TASKQ_CPR_SAFE 0x0002 /* Use CPR safe protocol */
#define TASKQ_DYNAMIC 0x0004 /* Use dynamic thread scheduling */
#define TASKQ_THREADS_CPU_PCT 0x0008 /* Use dynamic thread scheduling */
#define TASKQ_THREADS_CPU_PCT 0x0008 /* Scale # threads by # cpus */
#define TASKQ_DC_BATCH 0x0010 /* Mark threads as batch */
#define TQ_SLEEP KM_SLEEP /* Can block for memory */
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
#define TQ_FRONT 0x08 /* Queue in front */
extern taskq_t *system_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
#define taskq_create_proc(a, b, c, d, e, p, f) \
(taskq_create(a, b, c, d, e, f))
#define taskq_create_sysdc(a, b, d, e, p, dc, f) \
(taskq_create(a, b, maxclsyspri, d, e, f))
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, void *);
extern void system_taskq_init(void);
extern void system_taskq_fini(void);
#define XVA_MAPSIZE 3
#define XVA_MAGIC 0x78766174
@ -345,6 +371,7 @@ typedef struct vnode {
char *v_path;
} vnode_t;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
typedef struct xoptattr {
timestruc_t xoa_createtime; /* Create time of file */
@ -360,6 +387,8 @@ typedef struct xoptattr {
uint8_t xoa_opaque;
uint8_t xoa_av_quarantined;
uint8_t xoa_av_modified;
uint8_t xoa_av_scanstamp[AV_SCANSTAMP_SZ];
uint8_t xoa_reparse;
} xoptattr_t;
typedef struct vattr {
@ -406,9 +435,11 @@ typedef struct vsecattr {
#define CRCREAT 0
extern int fop_getattr(vnode_t *vp, vattr_t *vap);
#define VOP_CLOSE(vp, f, c, o, cr, ct) 0
#define VOP_PUTPAGE(vp, of, sz, fl, cr, ct) 0
#define VOP_GETATTR(vp, vap, fl, cr, ct) ((vap)->va_size = (vp)->v_size, 0)
#define VOP_GETATTR(vp, vap, fl, cr, ct) fop_getattr((vp), (vap));
#define VOP_FSYNC(vp, f, cr, ct) fsync((vp)->v_fd)
@ -433,13 +464,18 @@ extern vnode_t *rootdir;
/*
* Random stuff
*/
#define lbolt (gethrtime() >> 23)
#define lbolt64 (gethrtime() >> 23)
#define ddi_get_lbolt() (gethrtime() >> 23)
#define ddi_get_lbolt64() (gethrtime() >> 23)
#define hz 119 /* frequency when using gethrtime() >> 23 for lbolt */
extern void delay(clock_t ticks);
#define gethrestime_sec() time(NULL)
#define gethrestime(t) \
do {\
(t)->tv_sec = gethrestime_sec();\
(t)->tv_nsec = 0;\
} while (0);
#define max_ncpus 64
@ -490,6 +526,9 @@ typedef struct callb_cpr {
#define zone_dataset_visible(x, y) (1)
#define INGLOBALZONE(z) (1)
extern char *kmem_asprintf(const char *fmt, ...);
#define strfree(str) kmem_free((str), strlen(str)+1)
/*
* Hostname information
*/
@ -497,6 +536,9 @@ extern char hw_serial[]; /* for userland-emulated hostid access */
extern int ddi_strtoul(const char *str, char **nptr, int base,
unsigned long *result);
extern int ddi_strtoull(const char *str, char **nptr, int base,
u_longlong_t *result);
/* ZFS Boot Related stuff. */
struct _buf {

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -42,6 +42,7 @@
* Emulation of kernel services in userland.
*/
int aok;
uint64_t physmem;
vnode_t *rootdir = (vnode_t *)0xabcd1234;
char hw_serial[HW_HOSTID_LEN];
@ -50,6 +51,9 @@ struct utsname utsname = {
"userland", "libzpool", "1", "1", "na"
};
/* this only exists to have its address taken */
struct proc p0;
/*
* =========================================================================
* threads
@ -269,7 +273,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
clock_t delta;
top:
delta = abstime - lbolt;
delta = abstime - ddi_get_lbolt();
if (delta <= 0)
return (-1);
@ -444,6 +448,24 @@ vn_close(vnode_t *vp)
umem_free(vp, sizeof (vnode_t));
}
/*
* At a minimum we need to update the size since vdev_reopen()
* will no longer call vn_openat().
*/
int
fop_getattr(vnode_t *vp, vattr_t *vap)
{
struct stat64 st;
if (fstat64(vp->v_fd, &st) == -1) {
close(vp->v_fd);
return (errno);
}
vap->va_size = st.st_size;
return (0);
}
#ifdef ZFS_DEBUG
/*
@ -754,6 +776,17 @@ ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
return (0);
}
int
ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
{
char *end;
*result = strtoull(str, &end, base);
if (*result == 0)
return (errno);
return (0);
}
/*
* =========================================================================
* kernel emulation setup & teardown
@ -779,7 +812,8 @@ kernel_init(int mode)
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld", gethostid());
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
(mode & FWRITE) ? gethostid() : 0);
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
@ -794,6 +828,8 @@ kernel_fini(void)
{
spa_fini();
system_taskq_fini();
close(random_fd);
close(urandom_fd);
@ -884,3 +920,27 @@ ksiddomain_rele(ksiddomain_t *ksid)
spa_strfree(ksid->kd_name);
umem_free(ksid, sizeof (ksiddomain_t));
}
/*
* Do not change the length of the returned string; it must be freed
* with strfree().
*/
char *
kmem_asprintf(const char *fmt, ...)
{
int size;
va_list adx;
char *buf;
va_start(adx, fmt);
size = vsnprintf(NULL, 0, fmt, adx) + 1;
va_end(adx);
buf = kmem_alloc(size, KM_SLEEP);
va_start(adx, fmt);
size = vsnprintf(buf, size, fmt, adx);
va_end(adx);
return (buf);
}

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -49,6 +49,8 @@ struct taskq {
int tq_nalloc;
int tq_minalloc;
int tq_maxalloc;
kcondvar_t tq_maxalloc_cv;
int tq_maxalloc_wait;
task_t *tq_freelist;
task_t tq_task;
};
@ -57,26 +59,36 @@ static task_t *
task_alloc(taskq_t *tq, int tqflags)
{
task_t *t;
int rv;
if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
again: if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
tq->tq_freelist = t->task_next;
} else {
mutex_exit(&tq->tq_lock);
if (tq->tq_nalloc >= tq->tq_maxalloc) {
if (!(tqflags & KM_SLEEP)) {
mutex_enter(&tq->tq_lock);
if (!(tqflags & KM_SLEEP))
return (NULL);
}
/*
* We don't want to exceed tq_maxalloc, but we can't
* wait for other tasks to complete (and thus free up
* task structures) without risking deadlock with
* the caller. So, we just delay for one second
* to throttle the allocation rate.
* to throttle the allocation rate. If we have tasks
* complete before one second timeout expires then
* taskq_ent_free will signal us and we will
* immediately retry the allocation.
*/
delay(hz);
tq->tq_maxalloc_wait++;
rv = cv_timedwait(&tq->tq_maxalloc_cv,
&tq->tq_lock, ddi_get_lbolt() + hz);
tq->tq_maxalloc_wait--;
if (rv > 0)
goto again; /* signaled */
}
mutex_exit(&tq->tq_lock);
t = kmem_alloc(sizeof (task_t), tqflags);
mutex_enter(&tq->tq_lock);
if (t != NULL)
tq->tq_nalloc++;
@ -96,6 +108,9 @@ task_free(taskq_t *tq, task_t *t)
kmem_free(t, sizeof (task_t));
mutex_enter(&tq->tq_lock);
}
if (tq->tq_maxalloc_wait)
cv_signal(&tq->tq_maxalloc_cv);
}
taskqid_t
@ -114,8 +129,13 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
mutex_exit(&tq->tq_lock);
return (0);
}
t->task_next = &tq->tq_task;
t->task_prev = tq->tq_task.task_prev;
if (tqflags & TQ_FRONT) {
t->task_next = tq->tq_task.task_next;
t->task_prev = &tq->tq_task;
} else {
t->task_next = &tq->tq_task;
t->task_prev = tq->tq_task.task_prev;
}
t->task_next->task_prev = t;
t->task_prev->task_next = t;
t->task_func = func;
@ -191,6 +211,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
tq->tq_flags = flags | TASKQ_ACTIVE;
tq->tq_active = nthreads;
tq->tq_nthreads = nthreads;
@ -247,6 +268,7 @@ taskq_destroy(taskq_t *tq)
mutex_destroy(&tq->tq_lock);
cv_destroy(&tq->tq_dispatch_cv);
cv_destroy(&tq->tq_wait_cv);
cv_destroy(&tq->tq_maxalloc_cv);
kmem_free(tq, sizeof (taskq_t));
}
@ -272,3 +294,10 @@ system_taskq_init(void)
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
}
void
system_taskq_fini(void)
{
taskq_destroy(system_taskq);
system_taskq = NULL; /* defensive */
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <assert.h>
@ -90,7 +89,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
if (is_log)
prefix = "log ";
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
(uint64_t **)&vs, &c) != 0)
vs = &v0;

View File

@ -19,13 +19,10 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* AVL - generic AVL tree implementation for kernel use
*
@ -243,7 +240,7 @@ avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
* "void *" of the found tree node
*/
void *
avl_find(avl_tree_t *tree, void *value, avl_index_t *where)
avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
{
avl_node_t *node;
avl_node_t *prev = NULL;

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _AVL_H
#define _AVL_H
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This is a private header file. Applications should not directly include
* this file.
@ -163,7 +161,7 @@ extern void avl_create(avl_tree_t *tree,
* node - node that has the value being looked for
* where - position for use with avl_nearest() or avl_insert(), may be NULL
*/
extern void *avl_find(avl_tree_t *tree, void *node, avl_index_t *where);
extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where);
/*
* Insert a node into the tree.

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_NVPAIR_H
#define _SYS_NVPAIR_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/va_list.h>
@ -199,6 +197,7 @@ int nvlist_add_double(nvlist_t *, const char *, double);
int nvlist_remove(nvlist_t *, const char *, data_type_t);
int nvlist_remove_all(nvlist_t *, const char *);
int nvlist_remove_nvpair(nvlist_t *, nvpair_t *);
int nvlist_lookup_boolean(nvlist_t *, const char *);
int nvlist_lookup_boolean_value(nvlist_t *, const char *, boolean_t *);
@ -237,9 +236,11 @@ int nvlist_lookup_nvpair(nvlist_t *, const char *, nvpair_t **);
int nvlist_lookup_nvpair_embedded_index(nvlist_t *, const char *, nvpair_t **,
int *, char **);
boolean_t nvlist_exists(nvlist_t *, const char *);
boolean_t nvlist_empty(nvlist_t *);
/* processing nvpair */
nvpair_t *nvlist_next_nvpair(nvlist_t *, nvpair_t *);
nvpair_t *nvlist_prev_nvpair(nvlist_t *, nvpair_t *);
char *nvpair_name(nvpair_t *);
data_type_t nvpair_type(nvpair_t *);
int nvpair_type_is_array(nvpair_t *);

View File

@ -20,12 +20,10 @@
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/stropts.h>
#include <sys/debug.h>
#include <sys/isa_defs.h>
@ -692,6 +690,18 @@ nvlist_remove(nvlist_t *nvl, const char *name, data_type_t type)
return (ENOENT);
}
int
nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{
if (nvl == NULL || nvp == NULL)
return (EINVAL);
nvp_buf_unlink(nvl, nvp);
nvpair_free(nvp);
nvp_buf_free(nvl, nvp);
return (0);
}
/*
* This function calculates the size of an nvpair value.
*
@ -1162,6 +1172,42 @@ nvlist_next_nvpair(nvlist_t *nvl, nvpair_t *nvp)
return (curr != NULL ? &curr->nvi_nvp : NULL);
}
nvpair_t *
nvlist_prev_nvpair(nvlist_t *nvl, nvpair_t *nvp)
{
nvpriv_t *priv;
i_nvp_t *curr;
if (nvl == NULL ||
(priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
return (NULL);
curr = NVPAIR2I_NVP(nvp);
if (nvp == NULL)
curr = priv->nvp_last;
else if (priv->nvp_curr == curr || nvlist_contains_nvp(nvl, nvp))
curr = curr->nvi_prev;
else
curr = NULL;
priv->nvp_curr = curr;
return (curr != NULL ? &curr->nvi_nvp : NULL);
}
boolean_t
nvlist_empty(nvlist_t *nvl)
{
nvpriv_t *priv;
if (nvl == NULL ||
(priv = (nvpriv_t *)(uintptr_t)nvl->nvl_priv) == NULL)
return (B_TRUE);
return (priv->nvp_list == NULL);
}
char *
nvpair_name(nvpair_t *nvp)
{

View File

@ -20,10 +20,11 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_FS_ZFS_H
#define _SYS_FS_ZFS_H
@ -86,12 +87,11 @@ typedef enum {
ZFS_PROP_READONLY,
ZFS_PROP_ZONED,
ZFS_PROP_SNAPDIR,
ZFS_PROP_ACLMODE,
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
ZFS_PROP_CREATETXG, /* not exposed to the user */
ZFS_PROP_NAME, /* not exposed to the user */
ZFS_PROP_CANMOUNT,
ZFS_PROP_SHAREISCSI,
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
ZFS_PROP_XATTR,
ZFS_PROP_NUMCLONES, /* not exposed to the user */
@ -116,6 +116,12 @@ typedef enum {
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
ZFS_PROP_DEFER_DESTROY,
ZFS_PROP_USERREFS,
ZFS_PROP_LOGBIAS,
ZFS_PROP_UNIQUE, /* not exposed to the user */
ZFS_PROP_OBJSETID, /* not exposed to the user */
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
ZFS_PROP_SYNC,
ZFS_NUM_PROPS
} zfs_prop_t;
@ -138,8 +144,6 @@ extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
typedef enum {
ZPOOL_PROP_NAME,
ZPOOL_PROP_SIZE,
ZPOOL_PROP_USED,
ZPOOL_PROP_AVAILABLE,
ZPOOL_PROP_CAPACITY,
ZPOOL_PROP_ALTROOT,
ZPOOL_PROP_HEALTH,
@ -152,6 +156,10 @@ typedef enum {
ZPOOL_PROP_FAILUREMODE,
ZPOOL_PROP_LISTSNAPS,
ZPOOL_PROP_AUTOEXPAND,
ZPOOL_PROP_DEDUPDITTO,
ZPOOL_PROP_DEDUPRATIO,
ZPOOL_PROP_FREE,
ZPOOL_PROP_ALLOCATED,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@ -166,10 +174,27 @@ typedef enum {
ZPROP_SRC_DEFAULT = 0x2,
ZPROP_SRC_TEMPORARY = 0x4,
ZPROP_SRC_LOCAL = 0x8,
ZPROP_SRC_INHERITED = 0x10
ZPROP_SRC_INHERITED = 0x10,
ZPROP_SRC_RECEIVED = 0x20
} zprop_source_t;
#define ZPROP_SRC_ALL 0x1f
#define ZPROP_SRC_ALL 0x3f
#define ZPROP_SOURCE_VAL_RECVD "$recvd"
#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS"
/*
* Dataset flag implemented as a special entry in the props zap object
* indicating that the dataset has received properties on or after
* SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
* just as it did in earlier versions, and thereafter, local properties are
* preserved.
*/
#define ZPROP_HAS_RECVD "$hasrecvd"
typedef enum {
ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
} zprop_errflags_t;
typedef int (*zprop_func)(int, void *);
@ -191,9 +216,10 @@ boolean_t zfs_prop_setonce(zfs_prop_t);
const char *zfs_prop_to_name(zfs_prop_t);
zfs_prop_t zfs_name_to_prop(const char *);
boolean_t zfs_prop_user(const char *);
boolean_t zfs_prop_userquota(const char *name);
boolean_t zfs_prop_userquota(const char *);
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
/*
@ -206,6 +232,7 @@ uint64_t zpool_prop_default_numeric(zpool_prop_t);
boolean_t zpool_prop_readonly(zpool_prop_t);
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
/*
* Definitions for the Delegation.
@ -236,6 +263,8 @@ typedef enum {
#define ZFS_DELEG_PERM_GID "gid"
#define ZFS_DELEG_PERM_GROUPS "groups"
#define ZFS_MLSLABEL_DEFAULT "none"
#define ZFS_SMB_ACL_SRC "src"
#define ZFS_SMB_ACL_TARGET "target"
@ -245,6 +274,11 @@ typedef enum {
ZFS_CANMOUNT_NOAUTO = 2
} zfs_canmount_type_t;
typedef enum {
ZFS_LOGBIAS_LATENCY = 0,
ZFS_LOGBIAS_THROUGHPUT = 1
} zfs_logbias_op_t;
typedef enum zfs_share_op {
ZFS_SHARE_NFS = 0,
ZFS_UNSHARE_NFS = 1,
@ -265,6 +299,12 @@ typedef enum zfs_cache_type {
ZFS_CACHE_ALL = 2
} zfs_cache_type_t;
typedef enum {
ZFS_SYNC_STANDARD = 0,
ZFS_SYNC_ALWAYS = 1,
ZFS_SYNC_DISABLED = 2
} zfs_sync_type_t;
/*
* On-disk version number.
@ -287,14 +327,22 @@ typedef enum zfs_cache_type {
#define SPA_VERSION_16 16ULL
#define SPA_VERSION_17 17ULL
#define SPA_VERSION_18 18ULL
#define SPA_VERSION_19 19ULL
#define SPA_VERSION_20 20ULL
#define SPA_VERSION_21 21ULL
#define SPA_VERSION_22 22ULL
#define SPA_VERSION_23 23ULL
#define SPA_VERSION_24 24ULL
#define SPA_VERSION_25 25ULL
#define SPA_VERSION_26 26ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
* format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
#define SPA_VERSION SPA_VERSION_18
#define SPA_VERSION_STRING "18"
#define SPA_VERSION SPA_VERSION_26
#define SPA_VERSION_STRING "26"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@ -311,7 +359,7 @@ typedef enum zfs_cache_type {
#define SPA_VERSION_DITTO_BLOCKS SPA_VERSION_2
#define SPA_VERSION_SPARES SPA_VERSION_3
#define SPA_VERSION_RAIDZ2 SPA_VERSION_3
#define SPA_VERSION_BPLIST_ACCOUNT SPA_VERSION_3
#define SPA_VERSION_BPOBJ_ACCOUNT SPA_VERSION_3
#define SPA_VERSION_RAIDZ_DEFLATE SPA_VERSION_3
#define SPA_VERSION_DNODE_BYTES SPA_VERSION_3
#define SPA_VERSION_ZPOOL_HISTORY SPA_VERSION_4
@ -334,6 +382,15 @@ typedef enum zfs_cache_type {
#define SPA_VERSION_STMF_PROP SPA_VERSION_16
#define SPA_VERSION_RAIDZ3 SPA_VERSION_17
#define SPA_VERSION_USERREFS SPA_VERSION_18
#define SPA_VERSION_HOLES SPA_VERSION_19
#define SPA_VERSION_ZLE_COMPRESSION SPA_VERSION_20
#define SPA_VERSION_DEDUP SPA_VERSION_21
#define SPA_VERSION_RECVD_PROPS SPA_VERSION_22
#define SPA_VERSION_SLIM_ZIL SPA_VERSION_23
#define SPA_VERSION_SA SPA_VERSION_24
#define SPA_VERSION_SCAN SPA_VERSION_25
#define SPA_VERSION_DIR_CLONES SPA_VERSION_26
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@ -347,8 +404,9 @@ typedef enum zfs_cache_type {
#define ZPL_VERSION_2 2ULL
#define ZPL_VERSION_3 3ULL
#define ZPL_VERSION_4 4ULL
#define ZPL_VERSION ZPL_VERSION_4
#define ZPL_VERSION_STRING "4"
#define ZPL_VERSION_5 5ULL
#define ZPL_VERSION ZPL_VERSION_5
#define ZPL_VERSION_STRING "5"
#define ZPL_VERSION_INITIAL ZPL_VERSION_1
#define ZPL_VERSION_DIRENT_TYPE ZPL_VERSION_2
@ -356,6 +414,23 @@ typedef enum zfs_cache_type {
#define ZPL_VERSION_NORMALIZATION ZPL_VERSION_3
#define ZPL_VERSION_SYSATTR ZPL_VERSION_3
#define ZPL_VERSION_USERSPACE ZPL_VERSION_4
#define ZPL_VERSION_SA ZPL_VERSION_5
/* Rewind request information */
#define ZPOOL_NO_REWIND 1 /* No policy - default behavior */
#define ZPOOL_NEVER_REWIND 2 /* Do not search for best txg or rewind */
#define ZPOOL_TRY_REWIND 4 /* Search for best txg, but do not rewind */
#define ZPOOL_DO_REWIND 8 /* Rewind to best txg w/in deferred frees */
#define ZPOOL_EXTREME_REWIND 16 /* Allow extreme measures to find best txg */
#define ZPOOL_REWIND_MASK 28 /* All the possible rewind bits */
#define ZPOOL_REWIND_POLICIES 31 /* All the possible policy bits */
typedef struct zpool_rewind_policy {
uint32_t zrp_request; /* rewind behavior requested */
uint64_t zrp_maxmeta; /* max acceptable meta-data errors */
uint64_t zrp_maxdata; /* max acceptable data errors */
uint64_t zrp_txg; /* specific txg to load */
} zpool_rewind_policy_t;
/*
* The following are configuration names used in the nvlist describing a pool's
@ -380,7 +455,8 @@ typedef enum zfs_cache_type {
#define ZPOOL_CONFIG_ASHIFT "ashift"
#define ZPOOL_CONFIG_ASIZE "asize"
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_STATS "stats"
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
@ -393,6 +469,17 @@ typedef enum zfs_cache_type {
#define ZPOOL_CONFIG_PHYS_PATH "phys_path"
#define ZPOOL_CONFIG_IS_LOG "is_log"
#define ZPOOL_CONFIG_L2CACHE "l2cache"
#define ZPOOL_CONFIG_HOLE_ARRAY "hole_array"
#define ZPOOL_CONFIG_VDEV_CHILDREN "vdev_children"
#define ZPOOL_CONFIG_IS_HOLE "is_hole"
#define ZPOOL_CONFIG_DDT_HISTOGRAM "ddt_histogram"
#define ZPOOL_CONFIG_DDT_OBJ_STATS "ddt_object_stats"
#define ZPOOL_CONFIG_DDT_STATS "ddt_stats"
#define ZPOOL_CONFIG_SPLIT "splitcfg"
#define ZPOOL_CONFIG_ORIG_GUID "orig_guid"
#define ZPOOL_CONFIG_SPLIT_GUID "split_guid"
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
#define ZPOOL_CONFIG_REMOVING "removing"
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
@ -406,6 +493,19 @@ typedef enum zfs_cache_type {
#define ZPOOL_CONFIG_DEGRADED "degraded"
#define ZPOOL_CONFIG_REMOVED "removed"
#define ZPOOL_CONFIG_FRU "fru"
#define ZPOOL_CONFIG_AUX_STATE "aux_state"
/* Rewind policy parameters */
#define ZPOOL_REWIND_POLICY "rewind-policy"
#define ZPOOL_REWIND_REQUEST "rewind-request"
#define ZPOOL_REWIND_REQUEST_TXG "rewind-request-txg"
#define ZPOOL_REWIND_META_THRESH "rewind-meta-thresh"
#define ZPOOL_REWIND_DATA_THRESH "rewind-data-thresh"
/* Rewind data discovered */
#define ZPOOL_CONFIG_LOAD_TIME "rewind_txg_ts"
#define ZPOOL_CONFIG_LOAD_DATA_ERRORS "verify_data_errors"
#define ZPOOL_CONFIG_REWIND_TIME "seconds_of_rewind"
#define VDEV_TYPE_ROOT "root"
#define VDEV_TYPE_MIRROR "mirror"
@ -414,6 +514,7 @@ typedef enum zfs_cache_type {
#define VDEV_TYPE_DISK "disk"
#define VDEV_TYPE_FILE "file"
#define VDEV_TYPE_MISSING "missing"
#define VDEV_TYPE_HOLE "hole"
#define VDEV_TYPE_SPARE "spare"
#define VDEV_TYPE_LOG "log"
#define VDEV_TYPE_L2CACHE "l2cache"
@ -463,7 +564,9 @@ typedef enum vdev_aux {
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG /* cannot read log chain(s) */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
VDEV_AUX_EXTERNAL, /* external diagnosis */
VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
} vdev_aux_t;
/*
@ -484,14 +587,14 @@ typedef enum pool_state {
} pool_state_t;
/*
* Scrub types.
* Scan Functions.
*/
typedef enum pool_scrub_type {
POOL_SCRUB_NONE,
POOL_SCRUB_RESILVER,
POOL_SCRUB_EVERYTHING,
POOL_SCRUB_TYPES
} pool_scrub_type_t;
typedef enum pool_scan_func {
POOL_SCAN_NONE,
POOL_SCAN_SCRUB,
POOL_SCAN_RESILVER,
POOL_SCAN_FUNCS
} pool_scan_func_t;
/*
* ZIO types. Needed to interpret vdev statistics below.
@ -506,6 +609,36 @@ typedef enum zio_type {
ZIO_TYPES
} zio_type_t;
/*
* Pool statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
typedef struct pool_scan_stat {
/* values stored on disk */
uint64_t pss_func; /* pool_scan_func_t */
uint64_t pss_state; /* dsl_scan_state_t */
uint64_t pss_start_time; /* scan start time */
uint64_t pss_end_time; /* scan end time */
uint64_t pss_to_examine; /* total bytes to scan */
uint64_t pss_examined; /* total examined bytes */
uint64_t pss_to_process; /* total bytes to process */
uint64_t pss_processed; /* total processed bytes */
uint64_t pss_errors; /* scan errors */
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
} pool_scan_stat_t;
typedef enum dsl_scan_state {
DSS_NONE,
DSS_SCANNING,
DSS_FINISHED,
DSS_CANCELED,
DSS_NUM_STATES
} dsl_scan_state_t;
/*
* Vdev statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
@ -524,34 +657,49 @@ typedef struct vdev_stat {
uint64_t vs_write_errors; /* write errors */
uint64_t vs_checksum_errors; /* checksum errors */
uint64_t vs_self_healed; /* self-healed bytes */
uint64_t vs_scrub_type; /* pool_scrub_type_t */
uint64_t vs_scrub_complete; /* completed? */
uint64_t vs_scrub_examined; /* bytes examined; top */
uint64_t vs_scrub_repaired; /* bytes repaired; leaf */
uint64_t vs_scrub_errors; /* errors during scrub */
uint64_t vs_scrub_start; /* UTC scrub start time */
uint64_t vs_scrub_end; /* UTC scrub end time */
uint64_t vs_scan_removing; /* removing? */
uint64_t vs_scan_processed; /* scan processed bytes */
} vdev_stat_t;
/*
* DDT statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
typedef struct ddt_object {
uint64_t ddo_count; /* number of elments in ddt */
uint64_t ddo_dspace; /* size of ddt on disk */
uint64_t ddo_mspace; /* size of ddt in-core */
} ddt_object_t;
typedef struct ddt_stat {
uint64_t dds_blocks; /* blocks */
uint64_t dds_lsize; /* logical size */
uint64_t dds_psize; /* physical size */
uint64_t dds_dsize; /* deflated allocated size */
uint64_t dds_ref_blocks; /* referenced blocks */
uint64_t dds_ref_lsize; /* referenced lsize * refcnt */
uint64_t dds_ref_psize; /* referenced psize * refcnt */
uint64_t dds_ref_dsize; /* referenced dsize * refcnt */
} ddt_stat_t;
typedef struct ddt_histogram {
ddt_stat_t ddh_stat[64]; /* power-of-two histogram buckets */
} ddt_histogram_t;
#define ZVOL_DRIVER "zvol"
#define ZFS_DRIVER "zfs"
#define ZFS_DEV "/dev/zfs"
/*
* zvol paths. Irritatingly, the devfsadm interfaces want all these
* paths without the /dev prefix, but for some things, we want the
* /dev prefix. Below are the names without /dev.
*/
#define ZVOL_DEV_DIR "zvol/dsk"
#define ZVOL_RDEV_DIR "zvol/rdsk"
/*
* And here are the things we need with /dev, etc. in front of them.
*/
/* general zvol path */
#define ZVOL_DIR "/dev/zvol"
/* expansion */
#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:"
#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/"
/* for dump and swap */
#define ZVOL_FULL_DEV_DIR ZVOL_DIR "/dsk/"
#define ZVOL_FULL_RDEV_DIR ZVOL_DIR "/rdsk/"
#define ZVOL_PROP_NAME "name"
#define ZVOL_DEFAULT_BLOCKSIZE 8192
/*
* /dev/zfs ioctl numbers.
@ -566,7 +714,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_CONFIGS,
ZFS_IOC_POOL_STATS,
ZFS_IOC_POOL_TRYIMPORT,
ZFS_IOC_POOL_SCRUB,
ZFS_IOC_POOL_SCAN,
ZFS_IOC_POOL_FREEZE,
ZFS_IOC_POOL_UPGRADE,
ZFS_IOC_POOL_GET_HISTORY,
@ -582,8 +730,6 @@ typedef enum zfs_ioc {
ZFS_IOC_DATASET_LIST_NEXT,
ZFS_IOC_SNAPSHOT_LIST_NEXT,
ZFS_IOC_SET_PROP,
ZFS_IOC_CREATE_MINOR,
ZFS_IOC_REMOVE_MINOR,
ZFS_IOC_CREATE,
ZFS_IOC_DESTROY,
ZFS_IOC_ROLLBACK,
@ -604,7 +750,6 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_GET_PROPS,
ZFS_IOC_SET_FSACL,
ZFS_IOC_GET_FSACL,
ZFS_IOC_ISCSI_PERM_CHECK,
ZFS_IOC_SHARE,
ZFS_IOC_INHERIT_PROP,
ZFS_IOC_SMB_ACL,
@ -613,17 +758,21 @@ typedef enum zfs_ioc {
ZFS_IOC_USERSPACE_UPGRADE,
ZFS_IOC_HOLD,
ZFS_IOC_RELEASE,
ZFS_IOC_GET_HOLDS
ZFS_IOC_GET_HOLDS,
ZFS_IOC_OBJSET_RECVD_PROPS,
ZFS_IOC_VDEV_SPLIT
} zfs_ioc_t;
/*
* Internal SPA load state. Used by FMA diagnosis engine.
*/
typedef enum {
SPA_LOAD_NONE, /* no load in progress */
SPA_LOAD_OPEN, /* normal open */
SPA_LOAD_IMPORT, /* import in progress */
SPA_LOAD_TRYIMPORT /* tryimport in progress */
SPA_LOAD_NONE, /* no load in progress */
SPA_LOAD_OPEN, /* normal open */
SPA_LOAD_IMPORT, /* import in progress */
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
SPA_LOAD_RECOVER, /* recovery requested */
SPA_LOAD_ERROR /* load failed */
} spa_load_state_t;
/*
@ -686,7 +835,7 @@ typedef enum {
/*
* Note: This is encoded on-disk, so new events must be added to the
* end, and unused events can not be removed. Be sure to edit
* zpool_main.c: hist_event_table[].
* libzfs_pool.c: hist_event_table[].
*/
typedef enum history_internal_events {
LOG_NO_EVENT = 0,
@ -703,7 +852,7 @@ typedef enum history_internal_events {
LOG_POOL_VDEV_OFFLINE,
LOG_POOL_UPGRADE,
LOG_POOL_CLEAR,
LOG_POOL_SCRUB,
LOG_POOL_SCAN,
LOG_POOL_PROPSET,
LOG_DS_CREATE,
LOG_DS_CLONE,
@ -726,9 +875,10 @@ typedef enum history_internal_events {
LOG_DS_UPGRADE,
LOG_DS_REFQUOTA,
LOG_DS_REFRESERV,
LOG_POOL_SCRUB_DONE,
LOG_POOL_SCAN_DONE,
LOG_DS_USER_HOLD,
LOG_DS_USER_RELEASE,
LOG_POOL_SPLIT,
LOG_END
} history_internal_events_t;

View File

@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _ZFS_COMUTIL_H
#define _ZFS_COMUTIL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/fs/zfs.h>
#include <sys/types.h>
@ -35,7 +32,12 @@
extern "C" {
#endif
extern boolean_t zfs_allocatable_devs(nvlist_t *nv);
extern boolean_t zfs_allocatable_devs(nvlist_t *);
extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
extern int zfs_zpl_version_map(int spa_version);
extern int zfs_spa_version_map(int zpl_version);
extern const char *zfs_history_event_names[LOG_END];
#ifdef __cplusplus
}

View File

@ -0,0 +1,53 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_FLETCHER_H
#define _ZFS_FLETCHER_H
#include <sys/types.h>
#include <sys/spa.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* fletcher checksum functions
*/
void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_incremental_native(const void *, uint64_t,
zio_cksum_t *);
void fletcher_4_incremental_byteswap(const void *, uint64_t,
zio_cksum_t *);
#ifdef __cplusplus
}
#endif
#endif /* _ZFS_FLETCHER_H */

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _ZFS_PROP_H
#define _ZFS_PROP_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/fs/zfs.h>
#include <sys/types.h>
@ -79,6 +77,7 @@ typedef struct {
/* "zfs get" help message */
const zprop_index_t *pd_table; /* for index properties, a table */
/* defining the possible values */
size_t pd_table_size; /* number of entries in pd_table[] */
} zprop_desc_t;
/*
@ -99,16 +98,16 @@ zprop_desc_t *zpool_prop_get_table(void);
/*
* Common routines to initialize property tables
*/
void register_impl(int, const char *, zprop_type_t, uint64_t,
void zprop_register_impl(int, const char *, zprop_type_t, uint64_t,
const char *, zprop_attr_t, int, const char *, const char *,
boolean_t, boolean_t, const zprop_index_t *);
void register_string(int, const char *, const char *, zprop_attr_t attr,
int, const char *, const char *);
void register_number(int, const char *, uint64_t, zprop_attr_t, int,
void zprop_register_string(int, const char *, const char *,
zprop_attr_t attr, int, const char *, const char *);
void zprop_register_number(int, const char *, uint64_t, zprop_attr_t, int,
const char *, const char *);
void register_index(int, const char *, uint64_t, zprop_attr_t, int,
void zprop_register_index(int, const char *, uint64_t, zprop_attr_t, int,
const char *, const char *, const zprop_index_t *);
void register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
void zprop_register_hidden(int, const char *, zprop_type_t, zprop_attr_t,
int, const char *);
/*
@ -118,6 +117,7 @@ int zprop_iter_common(zprop_func, void *, boolean_t, boolean_t, zfs_type_t);
int zprop_name_to_prop(const char *, zfs_type_t);
int zprop_string_to_index(int, const char *, uint64_t *, zfs_type_t);
int zprop_index_to_string(int, uint64_t, const char **, zfs_type_t);
uint64_t zprop_random_value(int, uint64_t, zfs_type_t);
const char *zprop_values(int, zfs_type_t);
size_t zprop_width(int, boolean_t *, zfs_type_t);
boolean_t zprop_valid_for_type(int, zfs_type_t);

View File

@ -19,12 +19,9 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
/*
* This file is intended for functions that ought to be common between user
* land (libzfs) and the kernel. When many common routines need to be shared
@ -33,11 +30,15 @@
#if defined(_KERNEL)
#include <sys/systm.h>
#else
#include <string.h>
#endif
#include <sys/types.h>
#include <sys/fs/zfs.h>
#include <sys/int_limits.h>
#include <sys/nvpair.h>
#include "zfs_comutil.h"
/*
* Are there allocatable vdevs?
@ -63,3 +64,139 @@ zfs_allocatable_devs(nvlist_t *nv)
}
return (B_FALSE);
}
void
zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
{
nvlist_t *policy;
nvpair_t *elem;
char *nm;
/* Defaults */
zrpp->zrp_request = ZPOOL_NO_REWIND;
zrpp->zrp_maxmeta = 0;
zrpp->zrp_maxdata = UINT64_MAX;
zrpp->zrp_txg = UINT64_MAX;
if (nvl == NULL)
return;
elem = NULL;
while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
nm = nvpair_name(elem);
if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
if (nvpair_value_nvlist(elem, &policy) == 0)
zpool_get_rewind_policy(policy, zrpp);
return;
} else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
zrpp->zrp_request = ZPOOL_NO_REWIND;
} else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
(void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
} else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
(void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
} else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
(void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
}
}
if (zrpp->zrp_request == 0)
zrpp->zrp_request = ZPOOL_NO_REWIND;
}
typedef struct zfs_version_spa_map {
int version_zpl;
int version_spa;
} zfs_version_spa_map_t;
/*
* Keep this table in monotonically increasing version number order.
*/
static zfs_version_spa_map_t zfs_version_table[] = {
{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
{ZPL_VERSION_FUID, SPA_VERSION_FUID},
{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
{ZPL_VERSION_SA, SPA_VERSION_SA},
{0, 0}
};
/*
* Return the max zpl version for a corresponding spa version
* -1 is returned if no mapping exists.
*/
int
zfs_zpl_version_map(int spa_version)
{
int i;
int version = -1;
for (i = 0; zfs_version_table[i].version_spa; i++) {
if (spa_version >= zfs_version_table[i].version_spa)
version = zfs_version_table[i].version_zpl;
}
return (version);
}
/*
* Return the min spa version for a corresponding spa version
* -1 is returned if no mapping exists.
*/
int
zfs_spa_version_map(int zpl_version)
{
int i;
int version = -1;
for (i = 0; zfs_version_table[i].version_zpl; i++) {
if (zfs_version_table[i].version_zpl >= zpl_version)
return (zfs_version_table[i].version_spa);
}
return (version);
}
const char *zfs_history_event_names[LOG_END] = {
"invalid event",
"pool create",
"vdev add",
"pool remove",
"pool destroy",
"pool export",
"pool import",
"vdev attach",
"vdev replace",
"vdev detach",
"vdev online",
"vdev offline",
"vdev upgrade",
"pool clear",
"pool scrub",
"pool property set",
"create",
"clone",
"destroy",
"destroy_begin_sync",
"inherit",
"property set",
"quota set",
"permission update",
"permission remove",
"permission who remove",
"promote",
"receive",
"rename",
"reservation set",
"replay_inc_sync",
"replay_full_sync",
"rollback",
"snapshot",
"filesystem version upgrade",
"refquota set",
"refreservation set",
"pool scrub done",
"user hold",
"user release",
"pool split",
};

View File

@ -128,6 +128,7 @@
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
#include <sys/zio.h>
#include <sys/spa.h>
void

View File

@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#include <sys/zio.h>
#include <sys/spa.h>
#include <sys/u8_textprep.h>
@ -69,6 +70,16 @@ zfs_prop_init(void)
{ NULL }
};
static zprop_index_t dedup_table[] = {
{ "on", ZIO_CHECKSUM_ON },
{ "off", ZIO_CHECKSUM_OFF },
{ "verify", ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY },
{ "sha256", ZIO_CHECKSUM_SHA256 },
{ "sha256,verify",
ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
{ NULL }
};
static zprop_index_t compress_table[] = {
{ "on", ZIO_COMPRESS_ON },
{ "off", ZIO_COMPRESS_OFF },
@ -83,6 +94,7 @@ zfs_prop_init(void)
{ "gzip-7", ZIO_COMPRESS_GZIP_7 },
{ "gzip-8", ZIO_COMPRESS_GZIP_8 },
{ "gzip-9", ZIO_COMPRESS_GZIP_9 },
{ "zle", ZIO_COMPRESS_ZLE },
{ NULL }
};
@ -92,13 +104,6 @@ zfs_prop_init(void)
{ NULL }
};
static zprop_index_t acl_mode_table[] = {
{ "discard", ZFS_ACL_DISCARD },
{ "groupmask", ZFS_ACL_GROUPMASK },
{ "passthrough", ZFS_ACL_PASSTHROUGH },
{ NULL }
};
static zprop_index_t acl_inherit_table[] = {
{ "discard", ZFS_ACL_DISCARD },
{ "noallow", ZFS_ACL_NOALLOW },
@ -142,6 +147,7 @@ zfs_prop_init(void)
{ "2", 2 },
{ "3", 3 },
{ "4", 4 },
{ "5", 5 },
{ "current", ZPL_VERSION },
{ NULL }
};
@ -152,6 +158,12 @@ zfs_prop_init(void)
{ NULL }
};
static zprop_index_t logbias_table[] = {
{ "latency", ZFS_LOGBIAS_LATENCY },
{ "throughput", ZFS_LOGBIAS_THROUGHPUT },
{ NULL }
};
static zprop_index_t canmount_table[] = {
{ "off", ZFS_CANMOUNT_OFF },
{ "on", ZFS_CANMOUNT_ON },
@ -166,170 +178,208 @@ zfs_prop_init(void)
{ NULL }
};
static zprop_index_t sync_table[] = {
{ "standard", ZFS_SYNC_STANDARD },
{ "always", ZFS_SYNC_ALWAYS },
{ "disabled", ZFS_SYNC_DISABLED },
{ NULL }
};
/* inherit index properties */
register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT,
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"standard | always | disabled", "SYNC",
sync_table);
zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_VOLUME,
"on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
checksum_table);
register_index(ZFS_PROP_COMPRESSION, "compression",
zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"on | off | verify | sha256[,verify]", "DEDUP",
dedup_table);
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"on | off | lzjb | gzip | gzip-[1-9]", "COMPRESS", compress_table);
register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
"on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS",
compress_table);
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"hidden | visible", "SNAPDIR", snapdir_table);
register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_GROUPMASK,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"discard | groupmask | passthrough", "ACLMODE", acl_mode_table);
register_index(ZFS_PROP_ACLINHERIT, "aclinherit", ZFS_ACL_RESTRICTED,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"discard | noallow | restricted | passthrough | passthrough-x",
"ACLINHERIT", acl_inherit_table);
register_index(ZFS_PROP_COPIES, "copies", 1,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
zprop_register_index(ZFS_PROP_COPIES, "copies", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"1 | 2 | 3", "COPIES", copies_table);
register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
zprop_register_index(ZFS_PROP_PRIMARYCACHE, "primarycache",
ZFS_CACHE_ALL, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
"all | none | metadata", "PRIMARYCACHE", cache_table);
register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
zprop_register_index(ZFS_PROP_SECONDARYCACHE, "secondarycache",
ZFS_CACHE_ALL, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
"all | none | metadata", "SECONDARYCACHE", cache_table);
zprop_register_index(ZFS_PROP_LOGBIAS, "logbias", ZFS_LOGBIAS_LATENCY,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"latency | throughput", "LOGBIAS", logbias_table);
/* inherit index (boolean) properties */
register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);
register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
zprop_register_index(ZFS_PROP_DEVICES, "devices", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "DEVICES",
boolean_table);
register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
zprop_register_index(ZFS_PROP_EXEC, "exec", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "EXEC",
boolean_table);
register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
zprop_register_index(ZFS_PROP_SETUID, "setuid", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "SETUID",
boolean_table);
register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
zprop_register_index(ZFS_PROP_READONLY, "readonly", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "on | off", "RDONLY",
boolean_table);
register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
zprop_register_index(ZFS_PROP_ZONED, "zoned", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "ZONED", boolean_table);
register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
zprop_register_index(ZFS_PROP_XATTR, "xattr", 1, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "XATTR",
boolean_table);
register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
zprop_register_index(ZFS_PROP_VSCAN, "vscan", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off", "VSCAN",
boolean_table);
register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
zprop_register_index(ZFS_PROP_NBMAND, "nbmand", 0, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "on | off", "NBMAND",
boolean_table);
/* default index properties */
register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
"1 | 2 | 3 | 4 | current", "VERSION", version_table);
register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
"CANMOUNT", canmount_table);
/* readonly index (boolean) properties */
register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
zprop_register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table);
register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
zprop_register_index(ZFS_PROP_DEFER_DESTROY, "defer_destroy", 0,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "yes | no", "DEFER_DESTROY",
boolean_table);
/* set once index properties */
register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
zprop_register_index(ZFS_PROP_NORMALIZE, "normalization", 0,
PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
"none | formC | formD | formKC | formKD", "NORMALIZATION",
normalize_table);
register_index(ZFS_PROP_CASE, "casesensitivity", ZFS_CASE_SENSITIVE,
PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
zprop_register_index(ZFS_PROP_CASE, "casesensitivity",
ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_SNAPSHOT,
"sensitive | insensitive | mixed", "CASE", case_table);
/* set once index (boolean) properties */
register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
zprop_register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
"on | off", "UTF8ONLY", boolean_table);
/* string properties */
register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/", PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "<path> | legacy | none", "MOUNTPOINT");
register_string(ZFS_PROP_SHARENFS, "sharenfs", "off", PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options", "SHARENFS");
register_string(ZFS_PROP_SHAREISCSI, "shareiscsi", "off", PROP_INHERIT,
ZFS_TYPE_DATASET, "on | off | type=<type>", "SHAREISCSI");
register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
"MOUNTPOINT");
zprop_register_string(ZFS_PROP_SHARENFS, "sharenfs", "off",
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | share(1M) options",
"SHARENFS");
zprop_register_string(ZFS_PROP_TYPE, "type", NULL, PROP_READONLY,
ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE");
register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "on | off | sharemgr(1M) options", "SHARESMB");
zprop_register_string(ZFS_PROP_SHARESMB, "sharesmb", "off",
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
"on | off | sharemgr(1M) options", "SHARESMB");
zprop_register_string(ZFS_PROP_MLSLABEL, "mlslabel",
ZFS_MLSLABEL_DEFAULT, PROP_INHERIT, ZFS_TYPE_DATASET,
"<sensitivity label>", "MLSLABEL");
/* readonly number properties */
register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
ZFS_TYPE_DATASET, "<size>", "USED");
register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
register_number(ZFS_PROP_REFERENCED, "referenced", 0, PROP_READONLY,
ZFS_TYPE_DATASET, "<size>", "REFER");
register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
"<1.00x or higher if compressed>", "RATIO");
register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize", 8192,
PROP_ONETIME,
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDSNAP");
register_number(ZFS_PROP_USEDDS, "usedbydataset", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDDS");
register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDCHILD");
register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"USEDSNAP");
zprop_register_number(ZFS_PROP_USEDDS, "usedbydataset", 0,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"USEDDS");
zprop_register_number(ZFS_PROP_USEDCHILD, "usedbychildren", 0,
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"USEDCHILD");
zprop_register_number(ZFS_PROP_USEDREFRESERV, "usedbyrefreservation", 0,
PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
/* default number properties */
register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM, "<size> | none", "QUOTA");
register_number(ZFS_PROP_RESERVATION, "reservation", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size> | none", "RESERV");
register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
zprop_register_number(ZFS_PROP_RESERVATION, "reservation", 0,
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<size> | none", "RESERV");
zprop_register_number(ZFS_PROP_VOLSIZE, "volsize", 0, PROP_DEFAULT,
ZFS_TYPE_VOLUME, "<size>", "VOLSIZE");
register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
zprop_register_number(ZFS_PROP_REFQUOTA, "refquota", 0, PROP_DEFAULT,
ZFS_TYPE_FILESYSTEM, "<size> | none", "REFQUOTA");
register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
zprop_register_number(ZFS_PROP_REFRESERVATION, "refreservation", 0,
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<size> | none", "REFRESERV");
/* inherit number properties */
register_number(ZFS_PROP_RECORDSIZE, "recordsize", SPA_MAXBLOCKSIZE,
PROP_INHERIT,
zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
SPA_MAXBLOCKSIZE, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
/* hidden properties */
register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, NULL);
register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, NULL);
register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "CREATETXG");
zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
PROP_READONLY, ZFS_TYPE_DATASET, "NAME");
register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions", PROP_TYPE_STRING,
PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
zprop_register_hidden(ZFS_PROP_ISCSIOPTIONS, "iscsioptions",
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS");
zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
"STMF_SBD_LU");
register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY,
ZFS_TYPE_DATASET, "GUID");
register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, NULL);
zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "GUID");
zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
"USERACCOUNTING");
zprop_register_hidden(ZFS_PROP_UNIQUE, "unique", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "UNIQUE");
zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
/*
* Property to be removed once libbe is integrated
*/
zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
"PRIV_PROP");
/* oddball properties */
register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL,
PROP_READONLY, ZFS_TYPE_DATASET,
zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
NULL, PROP_READONLY, ZFS_TYPE_DATASET,
"<date>", "CREATION", B_FALSE, B_TRUE, NULL);
}
@ -337,6 +387,11 @@ boolean_t
zfs_prop_delegatable(zfs_prop_t prop)
{
zprop_desc_t *pd = &zfs_prop_table[prop];
/* The mlslabel property is never delegatable. */
if (prop == ZFS_PROP_MLSLABEL)
return (B_FALSE);
return (pd->pd_attr != PROP_READONLY);
}
@ -421,6 +476,12 @@ zfs_prop_index_to_string(zfs_prop_t prop, uint64_t index, const char **string)
return (zprop_index_to_string(prop, index, string, ZFS_TYPE_DATASET));
}
uint64_t
zfs_prop_random_value(zfs_prop_t prop, uint64_t seed)
{
return (zprop_random_value(prop, seed, ZFS_TYPE_DATASET));
}
/*
* Returns TRUE if the property applies to any of the given dataset types.
*/

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -64,48 +64,55 @@ zpool_prop_init(void)
};
/* string properties */
register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
zprop_register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT,
ZFS_TYPE_POOL, "<path>", "ALTROOT");
register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
zprop_register_string(ZPOOL_PROP_BOOTFS, "bootfs", NULL, PROP_DEFAULT,
ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, PROP_DEFAULT,
ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
/* readonly number properties */
register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "SIZE");
register_number(ZPOOL_PROP_USED, "used", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "USED");
register_number(ZPOOL_PROP_AVAILABLE, "available", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "AVAIL");
register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "FREE");
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "CAP");
register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<guid>", "GUID");
register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
zprop_register_number(ZPOOL_PROP_HEALTH, "health", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<state>", "HEALTH");
zprop_register_number(ZPOOL_PROP_DEDUPRATIO, "dedupratio", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
"DEDUP");
/* default number properties */
register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
/* default index (boolean) properties */
register_index(ZPOOL_PROP_DELEGATION, "delegation", 1, PROP_DEFAULT,
ZFS_TYPE_POOL, "on | off", "DELEGATION", boolean_table);
register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0, PROP_DEFAULT,
ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0, PROP_DEFAULT,
ZFS_TYPE_POOL, "on | off", "LISTSNAPS", boolean_table);
register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0, PROP_DEFAULT,
ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "DELEGATION",
boolean_table);
zprop_register_index(ZPOOL_PROP_AUTOREPLACE, "autoreplace", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "REPLACE", boolean_table);
zprop_register_index(ZPOOL_PROP_LISTSNAPS, "listsnapshots", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "LISTSNAPS",
boolean_table);
zprop_register_index(ZPOOL_PROP_AUTOEXPAND, "autoexpand", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
/* default index properties */
register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL,
"wait | continue | panic", "FAILMODE", failuremode_table);
/* hidden properties */
register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
PROP_READONLY, ZFS_TYPE_POOL, "NAME");
}
@ -166,6 +173,12 @@ zpool_prop_index_to_string(zpool_prop_t prop, uint64_t index,
return (zprop_index_to_string(prop, index, string, ZFS_TYPE_POOL));
}
uint64_t
zpool_prop_random_value(zpool_prop_t prop, uint64_t seed)
{
return (zprop_random_value(prop, seed, ZFS_TYPE_POOL));
}
#ifndef _KERNEL
const char *

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -65,7 +65,7 @@ zprop_get_numprops(zfs_type_t type)
}
void
register_impl(int prop, const char *name, zprop_type_t type,
zprop_register_impl(int prop, const char *name, zprop_type_t type,
uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
int objset_types, const char *values, const char *colname,
boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
@ -76,6 +76,8 @@ register_impl(int prop, const char *name, zprop_type_t type,
pd = &prop_tbl[prop];
ASSERT(pd->pd_name == NULL || pd->pd_name == name);
ASSERT(name != NULL);
ASSERT(colname != NULL);
pd->pd_name = name;
pd->pd_propnum = prop;
@ -89,40 +91,44 @@ register_impl(int prop, const char *name, zprop_type_t type,
pd->pd_rightalign = rightalign;
pd->pd_visible = visible;
pd->pd_table = idx_tbl;
pd->pd_table_size = 0;
while (idx_tbl && (idx_tbl++)->pi_name != NULL)
pd->pd_table_size++;
}
void
register_string(int prop, const char *name, const char *def,
zprop_register_string(int prop, const char *name, const char *def,
zprop_attr_t attr, int objset_types, const char *values,
const char *colname)
{
register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
objset_types, values, colname, B_FALSE, B_TRUE, NULL);
}
void
register_number(int prop, const char *name, uint64_t def, zprop_attr_t attr,
int objset_types, const char *values, const char *colname)
zprop_register_number(int prop, const char *name, uint64_t def,
zprop_attr_t attr, int objset_types, const char *values,
const char *colname)
{
register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
objset_types, values, colname, B_TRUE, B_TRUE, NULL);
}
void
register_index(int prop, const char *name, uint64_t def, zprop_attr_t attr,
int objset_types, const char *values, const char *colname,
const zprop_index_t *idx_tbl)
zprop_register_index(int prop, const char *name, uint64_t def,
zprop_attr_t attr, int objset_types, const char *values,
const char *colname, const zprop_index_t *idx_tbl)
{
register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
}
void
register_hidden(int prop, const char *name, zprop_type_t type,
zprop_register_hidden(int prop, const char *name, zprop_type_t type,
zprop_attr_t attr, int objset_types, const char *colname)
{
register_impl(prop, name, type, 0, NULL, attr,
zprop_register_impl(prop, name, type, 0, NULL, attr,
objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
}
@ -307,6 +313,25 @@ zprop_index_to_string(int prop, uint64_t index, const char **string,
return (-1);
}
/*
* Return a random valid property value. Used by ztest.
*/
uint64_t
zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
{
zprop_desc_t *prop_tbl;
const zprop_index_t *idx_tbl;
ASSERT((uint_t)prop < zprop_get_numprops(type));
prop_tbl = zprop_get_proptable(type);
idx_tbl = prop_tbl[prop].pd_table;
if (idx_tbl == NULL)
return (seed);
return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
}
const char *
zprop_values(int prop, zfs_type_t type)
{

File diff suppressed because it is too large Load Diff

View File

@ -19,331 +19,51 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/bplist.h>
#include <sys/zfs_context.h>
static int
bplist_hold(bplist_t *bpl)
void
bplist_create(bplist_t *bpl)
{
ASSERT(MUTEX_HELD(&bpl->bpl_lock));
if (bpl->bpl_dbuf == NULL) {
int err = dmu_bonus_hold(bpl->bpl_mos,
bpl->bpl_object, bpl, &bpl->bpl_dbuf);
if (err)
return (err);
bpl->bpl_phys = bpl->bpl_dbuf->db_data;
}
return (0);
}
uint64_t
bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx)
{
int size;
size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ?
BPLIST_SIZE_V0 : sizeof (bplist_phys_t);
return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize,
DMU_OT_BPLIST_HDR, size, tx));
mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&bpl->bpl_list, sizeof (bplist_entry_t),
offsetof(bplist_entry_t, bpe_node));
}
void
bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx)
bplist_destroy(bplist_t *bpl)
{
VERIFY(dmu_object_free(mos, object, tx) == 0);
}
int
bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object)
{
dmu_object_info_t doi;
int err;
err = dmu_object_info(mos, object, &doi);
if (err)
return (err);
mutex_enter(&bpl->bpl_lock);
ASSERT(bpl->bpl_dbuf == NULL);
ASSERT(bpl->bpl_phys == NULL);
ASSERT(bpl->bpl_cached_dbuf == NULL);
ASSERT(bpl->bpl_queue == NULL);
ASSERT(object != 0);
ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST);
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR);
bpl->bpl_mos = mos;
bpl->bpl_object = object;
bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1);
bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT;
bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t));
mutex_exit(&bpl->bpl_lock);
return (0);
list_destroy(&bpl->bpl_list);
mutex_destroy(&bpl->bpl_lock);
}
void
bplist_close(bplist_t *bpl)
bplist_append(bplist_t *bpl, const blkptr_t *bp)
{
mutex_enter(&bpl->bpl_lock);
ASSERT(bpl->bpl_queue == NULL);
if (bpl->bpl_cached_dbuf) {
dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
bpl->bpl_cached_dbuf = NULL;
}
if (bpl->bpl_dbuf) {
dmu_buf_rele(bpl->bpl_dbuf, bpl);
bpl->bpl_dbuf = NULL;
bpl->bpl_phys = NULL;
}
mutex_exit(&bpl->bpl_lock);
}
boolean_t
bplist_empty(bplist_t *bpl)
{
boolean_t rv;
if (bpl->bpl_object == 0)
return (B_TRUE);
bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_SLEEP);
mutex_enter(&bpl->bpl_lock);
VERIFY(0 == bplist_hold(bpl)); /* XXX */
rv = (bpl->bpl_phys->bpl_entries == 0);
mutex_exit(&bpl->bpl_lock);
return (rv);
}
static int
bplist_cache(bplist_t *bpl, uint64_t blkid)
{
int err = 0;
if (bpl->bpl_cached_dbuf == NULL ||
bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) {
if (bpl->bpl_cached_dbuf != NULL)
dmu_buf_rele(bpl->bpl_cached_dbuf, bpl);
err = dmu_buf_hold(bpl->bpl_mos,
bpl->bpl_object, blkid << bpl->bpl_blockshift,
bpl, &bpl->bpl_cached_dbuf);
ASSERT(err || bpl->bpl_cached_dbuf->db_size ==
1ULL << bpl->bpl_blockshift);
}
return (err);
}
int
bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp)
{
uint64_t blk, off;
blkptr_t *bparray;
int err;
mutex_enter(&bpl->bpl_lock);
err = bplist_hold(bpl);
if (err) {
mutex_exit(&bpl->bpl_lock);
return (err);
}
if (*itorp >= bpl->bpl_phys->bpl_entries) {
mutex_exit(&bpl->bpl_lock);
return (ENOENT);
}
blk = *itorp >> bpl->bpl_bpshift;
off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift);
err = bplist_cache(bpl, blk);
if (err) {
mutex_exit(&bpl->bpl_lock);
return (err);
}
bparray = bpl->bpl_cached_dbuf->db_data;
*bp = bparray[off];
(*itorp)++;
mutex_exit(&bpl->bpl_lock);
return (0);
}
int
bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
{
uint64_t blk, off;
blkptr_t *bparray;
int err;
ASSERT(!BP_IS_HOLE(bp));
mutex_enter(&bpl->bpl_lock);
err = bplist_hold(bpl);
if (err)
return (err);
blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);
err = bplist_cache(bpl, blk);
if (err) {
mutex_exit(&bpl->bpl_lock);
return (err);
}
dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
bparray = bpl->bpl_cached_dbuf->db_data;
bparray[off] = *bp;
/* We never need the fill count. */
bparray[off].blk_fill = 0;
/* The bplist will compress better if we can leave off the checksum */
bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));
dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
bpl->bpl_phys->bpl_entries++;
bpl->bpl_phys->bpl_bytes +=
bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp);
if (bpl->bpl_havecomp) {
bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
}
mutex_exit(&bpl->bpl_lock);
return (0);
}
/*
* Deferred entry; will be written later by bplist_sync().
*/
void
bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
{
bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);
ASSERT(!BP_IS_HOLE(bp));
mutex_enter(&bpl->bpl_lock);
bpq->bpq_blk = *bp;
bpq->bpq_next = bpl->bpl_queue;
bpl->bpl_queue = bpq;
bpe->bpe_blk = *bp;
list_insert_tail(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
}
void
bplist_sync(bplist_t *bpl, dmu_tx_t *tx)
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
{
bplist_q_t *bpq;
bplist_entry_t *bpe;
mutex_enter(&bpl->bpl_lock);
while ((bpq = bpl->bpl_queue) != NULL) {
bpl->bpl_queue = bpq->bpq_next;
while (bpe = list_head(&bpl->bpl_list)) {
list_remove(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx));
kmem_free(bpq, sizeof (*bpq));
func(arg, &bpe->bpe_blk, tx);
kmem_free(bpe, sizeof (*bpe));
mutex_enter(&bpl->bpl_lock);
}
mutex_exit(&bpl->bpl_lock);
}
void
bplist_vacate(bplist_t *bpl, dmu_tx_t *tx)
{
mutex_enter(&bpl->bpl_lock);
ASSERT3P(bpl->bpl_queue, ==, NULL);
VERIFY(0 == bplist_hold(bpl));
dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
VERIFY(0 == dmu_free_range(bpl->bpl_mos,
bpl->bpl_object, 0, -1ULL, tx));
bpl->bpl_phys->bpl_entries = 0;
bpl->bpl_phys->bpl_bytes = 0;
if (bpl->bpl_havecomp) {
bpl->bpl_phys->bpl_comp = 0;
bpl->bpl_phys->bpl_uncomp = 0;
}
mutex_exit(&bpl->bpl_lock);
}
int
bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
int err;
mutex_enter(&bpl->bpl_lock);
err = bplist_hold(bpl);
if (err) {
mutex_exit(&bpl->bpl_lock);
return (err);
}
*usedp = bpl->bpl_phys->bpl_bytes;
if (bpl->bpl_havecomp) {
*compp = bpl->bpl_phys->bpl_comp;
*uncompp = bpl->bpl_phys->bpl_uncomp;
}
mutex_exit(&bpl->bpl_lock);
if (!bpl->bpl_havecomp) {
uint64_t itor = 0, comp = 0, uncomp = 0;
blkptr_t bp;
while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
comp += BP_GET_PSIZE(&bp);
uncomp += BP_GET_UCSIZE(&bp);
}
if (err == ENOENT)
err = 0;
*compp = comp;
*uncompp = uncomp;
}
return (err);
}
/*
* Return (in *dasizep) the amount of space on the deadlist which is:
* mintxg < blk_birth <= maxtxg
*/
int
bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg,
uint64_t *dasizep)
{
uint64_t size = 0;
uint64_t itor = 0;
blkptr_t bp;
int err;
/*
* As an optimization, if they want the whole txg range, just
* get bpl_bytes rather than iterating over the bps.
*/
if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) {
mutex_enter(&bpl->bpl_lock);
err = bplist_hold(bpl);
if (err == 0)
*dasizep = bpl->bpl_phys->bpl_bytes;
mutex_exit(&bpl->bpl_lock);
return (err);
}
while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) {
if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) {
size +=
bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp);
}
}
if (err == ENOENT)
err = 0;
*dasizep = size;
return (err);
}

462
module/zfs/bpobj.c Normal file
View File

@ -0,0 +1,462 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
uint64_t
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
{
int size;
if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_BPOBJ_ACCOUNT)
size = BPOBJ_SIZE_V0;
else if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
size = BPOBJ_SIZE_V1;
else
size = sizeof (bpobj_phys_t);
return (dmu_object_alloc(os, DMU_OT_BPOBJ, blocksize,
DMU_OT_BPOBJ_HDR, size, tx));
}
void
bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
{
int64_t i;
bpobj_t bpo;
dmu_object_info_t doi;
int epb;
dmu_buf_t *dbuf = NULL;
VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
mutex_enter(&bpo.bpo_lock);
if (!bpo.bpo_havesubobj || bpo.bpo_phys->bpo_subobjs == 0)
goto out;
VERIFY3U(0, ==, dmu_object_info(os, bpo.bpo_phys->bpo_subobjs, &doi));
epb = doi.doi_data_block_size / sizeof (uint64_t);
for (i = bpo.bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
uint64_t *objarray;
uint64_t offset, blkoff;
offset = i * sizeof (uint64_t);
blkoff = P2PHASE(i, epb);
if (dbuf == NULL || dbuf->db_offset > offset) {
if (dbuf)
dmu_buf_rele(dbuf, FTAG);
VERIFY3U(0, ==, dmu_buf_hold(os,
bpo.bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0));
}
ASSERT3U(offset, >=, dbuf->db_offset);
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
objarray = dbuf->db_data;
bpobj_free(os, objarray[blkoff], tx);
}
if (dbuf) {
dmu_buf_rele(dbuf, FTAG);
dbuf = NULL;
}
VERIFY3U(0, ==, dmu_object_free(os, bpo.bpo_phys->bpo_subobjs, tx));
out:
mutex_exit(&bpo.bpo_lock);
bpobj_close(&bpo);
VERIFY3U(0, ==, dmu_object_free(os, obj, tx));
}
int
bpobj_open(bpobj_t *bpo, objset_t *os, uint64_t object)
{
dmu_object_info_t doi;
int err;
err = dmu_object_info(os, object, &doi);
if (err)
return (err);
bzero(bpo, sizeof (*bpo));
mutex_init(&bpo->bpo_lock, NULL, MUTEX_DEFAULT, NULL);
ASSERT(bpo->bpo_dbuf == NULL);
ASSERT(bpo->bpo_phys == NULL);
ASSERT(object != 0);
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ);
ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPOBJ_HDR);
bpo->bpo_os = os;
bpo->bpo_object = object;
bpo->bpo_epb = doi.doi_data_block_size >> SPA_BLKPTRSHIFT;
bpo->bpo_havecomp = (doi.doi_bonus_size > BPOBJ_SIZE_V0);
bpo->bpo_havesubobj = (doi.doi_bonus_size > BPOBJ_SIZE_V1);
err = dmu_bonus_hold(bpo->bpo_os,
bpo->bpo_object, bpo, &bpo->bpo_dbuf);
if (err)
return (err);
bpo->bpo_phys = bpo->bpo_dbuf->db_data;
return (0);
}
void
bpobj_close(bpobj_t *bpo)
{
/* Lame workaround for closing a bpobj that was never opened. */
if (bpo->bpo_object == 0)
return;
dmu_buf_rele(bpo->bpo_dbuf, bpo);
if (bpo->bpo_cached_dbuf != NULL)
dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
bpo->bpo_dbuf = NULL;
bpo->bpo_phys = NULL;
bpo->bpo_cached_dbuf = NULL;
mutex_destroy(&bpo->bpo_lock);
}
static int
bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
boolean_t free)
{
dmu_object_info_t doi;
int epb;
int64_t i;
int err = 0;
dmu_buf_t *dbuf = NULL;
mutex_enter(&bpo->bpo_lock);
if (free)
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
for (i = bpo->bpo_phys->bpo_num_blkptrs - 1; i >= 0; i--) {
blkptr_t *bparray;
blkptr_t *bp;
uint64_t offset, blkoff;
offset = i * sizeof (blkptr_t);
blkoff = P2PHASE(i, bpo->bpo_epb);
if (dbuf == NULL || dbuf->db_offset > offset) {
if (dbuf)
dmu_buf_rele(dbuf, FTAG);
err = dmu_buf_hold(bpo->bpo_os, bpo->bpo_object, offset,
FTAG, &dbuf, 0);
if (err)
break;
}
ASSERT3U(offset, >=, dbuf->db_offset);
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
bparray = dbuf->db_data;
bp = &bparray[blkoff];
err = func(arg, bp, tx);
if (err)
break;
if (free) {
bpo->bpo_phys->bpo_bytes -=
bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
if (bpo->bpo_havecomp) {
bpo->bpo_phys->bpo_comp -= BP_GET_PSIZE(bp);
bpo->bpo_phys->bpo_uncomp -= BP_GET_UCSIZE(bp);
}
bpo->bpo_phys->bpo_num_blkptrs--;
ASSERT3S(bpo->bpo_phys->bpo_num_blkptrs, >=, 0);
}
}
if (dbuf) {
dmu_buf_rele(dbuf, FTAG);
dbuf = NULL;
}
if (free) {
i++;
VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object,
i * sizeof (blkptr_t), -1ULL, tx));
}
if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0)
goto out;
ASSERT(bpo->bpo_havecomp);
err = dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi);
if (err)
return (err);
epb = doi.doi_data_block_size / sizeof (uint64_t);
for (i = bpo->bpo_phys->bpo_num_subobjs - 1; i >= 0; i--) {
uint64_t *objarray;
uint64_t offset, blkoff;
bpobj_t sublist;
uint64_t used_before, comp_before, uncomp_before;
uint64_t used_after, comp_after, uncomp_after;
offset = i * sizeof (uint64_t);
blkoff = P2PHASE(i, epb);
if (dbuf == NULL || dbuf->db_offset > offset) {
if (dbuf)
dmu_buf_rele(dbuf, FTAG);
err = dmu_buf_hold(bpo->bpo_os,
bpo->bpo_phys->bpo_subobjs, offset, FTAG, &dbuf, 0);
if (err)
break;
}
ASSERT3U(offset, >=, dbuf->db_offset);
ASSERT3U(offset, <, dbuf->db_offset + dbuf->db_size);
objarray = dbuf->db_data;
err = bpobj_open(&sublist, bpo->bpo_os, objarray[blkoff]);
if (err)
break;
if (free) {
err = bpobj_space(&sublist,
&used_before, &comp_before, &uncomp_before);
if (err)
break;
}
err = bpobj_iterate_impl(&sublist, func, arg, tx, free);
if (free) {
VERIFY3U(0, ==, bpobj_space(&sublist,
&used_after, &comp_after, &uncomp_after));
bpo->bpo_phys->bpo_bytes -= used_before - used_after;
ASSERT3S(bpo->bpo_phys->bpo_bytes, >=, 0);
bpo->bpo_phys->bpo_comp -= comp_before - used_after;
bpo->bpo_phys->bpo_uncomp -=
uncomp_before - uncomp_after;
}
bpobj_close(&sublist);
if (err)
break;
if (free) {
err = dmu_object_free(bpo->bpo_os,
objarray[blkoff], tx);
if (err)
break;
bpo->bpo_phys->bpo_num_subobjs--;
ASSERT3S(bpo->bpo_phys->bpo_num_subobjs, >=, 0);
}
}
if (dbuf) {
dmu_buf_rele(dbuf, FTAG);
dbuf = NULL;
}
if (free) {
VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os,
bpo->bpo_phys->bpo_subobjs,
(i + 1) * sizeof (uint64_t), -1ULL, tx));
}
out:
/* If there are no entries, there should be no bytes. */
ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
(bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
bpo->bpo_phys->bpo_bytes == 0);
mutex_exit(&bpo->bpo_lock);
return (err);
}
/*
* Iterate and remove the entries. If func returns nonzero, iteration
* will stop and that entry will not be removed.
*/
int
bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
{
return (bpobj_iterate_impl(bpo, func, arg, tx, B_TRUE));
}
/*
* Iterate the entries. If func returns nonzero, iteration will stop.
*/
int
bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx)
{
return (bpobj_iterate_impl(bpo, func, arg, tx, B_FALSE));
}
void
bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
{
bpobj_t subbpo;
uint64_t used, comp, uncomp;
ASSERT(bpo->bpo_havesubobj);
ASSERT(bpo->bpo_havecomp);
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
bpobj_close(&subbpo);
if (used == 0) {
/* No point in having an empty subobj. */
bpobj_free(bpo->bpo_os, subobj, tx);
return;
}
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
if (bpo->bpo_phys->bpo_subobjs == 0) {
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
}
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
sizeof (subobj), &subobj, tx);
bpo->bpo_phys->bpo_num_subobjs++;
bpo->bpo_phys->bpo_bytes += used;
bpo->bpo_phys->bpo_comp += comp;
bpo->bpo_phys->bpo_uncomp += uncomp;
mutex_exit(&bpo->bpo_lock);
}
void
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
{
blkptr_t stored_bp = *bp;
uint64_t offset;
int blkoff;
blkptr_t *bparray;
ASSERT(!BP_IS_HOLE(bp));
/* We never need the fill count. */
stored_bp.blk_fill = 0;
/* The bpobj will compress better if we can leave off the checksum */
if (!BP_GET_DEDUP(bp))
bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
mutex_enter(&bpo->bpo_lock);
offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
blkoff = P2PHASE(bpo->bpo_phys->bpo_num_blkptrs, bpo->bpo_epb);
if (bpo->bpo_cached_dbuf == NULL ||
offset < bpo->bpo_cached_dbuf->db_offset ||
offset >= bpo->bpo_cached_dbuf->db_offset +
bpo->bpo_cached_dbuf->db_size) {
if (bpo->bpo_cached_dbuf)
dmu_buf_rele(bpo->bpo_cached_dbuf, bpo);
VERIFY3U(0, ==, dmu_buf_hold(bpo->bpo_os, bpo->bpo_object,
offset, bpo, &bpo->bpo_cached_dbuf, 0));
}
dmu_buf_will_dirty(bpo->bpo_cached_dbuf, tx);
bparray = bpo->bpo_cached_dbuf->db_data;
bparray[blkoff] = stored_bp;
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
bpo->bpo_phys->bpo_num_blkptrs++;
bpo->bpo_phys->bpo_bytes +=
bp_get_dsize_sync(dmu_objset_spa(bpo->bpo_os), bp);
if (bpo->bpo_havecomp) {
bpo->bpo_phys->bpo_comp += BP_GET_PSIZE(bp);
bpo->bpo_phys->bpo_uncomp += BP_GET_UCSIZE(bp);
}
mutex_exit(&bpo->bpo_lock);
}
struct space_range_arg {
spa_t *spa;
uint64_t mintxg;
uint64_t maxtxg;
uint64_t used;
uint64_t comp;
uint64_t uncomp;
};
/* ARGSUSED */
static int
space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
struct space_range_arg *sra = arg;
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
sra->used += bp_get_dsize_sync(sra->spa, bp);
sra->comp += BP_GET_PSIZE(bp);
sra->uncomp += BP_GET_UCSIZE(bp);
}
return (0);
}
int
bpobj_space(bpobj_t *bpo, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
mutex_enter(&bpo->bpo_lock);
*usedp = bpo->bpo_phys->bpo_bytes;
if (bpo->bpo_havecomp) {
*compp = bpo->bpo_phys->bpo_comp;
*uncompp = bpo->bpo_phys->bpo_uncomp;
mutex_exit(&bpo->bpo_lock);
return (0);
} else {
mutex_exit(&bpo->bpo_lock);
return (bpobj_space_range(bpo, 0, UINT64_MAX,
usedp, compp, uncompp));
}
}
/*
* Return the amount of space in the bpobj which is:
* mintxg < blk_birth <= maxtxg
*/
int
bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
struct space_range_arg sra = { 0 };
int err;
/*
* As an optimization, if they want the whole txg range, just
* get bpo_bytes rather than iterating over the bps.
*/
if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX && bpo->bpo_havecomp)
return (bpobj_space(bpo, usedp, compp, uncompp));
sra.spa = dmu_objset_spa(bpo->bpo_os);
sra.mintxg = mintxg;
sra.maxtxg = maxtxg;
err = bpobj_iterate_nofree(bpo, space_range_cb, &sra, NULL);
*usedp = sra.used;
*compp = sra.comp;
*uncompp = sra.uncomp;
return (err);
}

File diff suppressed because it is too large Load Diff

1140
module/zfs/ddt.c Normal file

File diff suppressed because it is too large Load Diff

157
module/zfs/ddt_zap.c Normal file
View File

@ -0,0 +1,157 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/ddt.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
#include <util/sscanf.h>
int ddt_zap_leaf_blockshift = 12;
int ddt_zap_indirect_blockshift = 12;
static int
ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
{
zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
if (prehash)
flags |= ZAP_FLAG_PRE_HASHED_KEY;
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
DMU_OT_NONE, 0, tx);
return (*objectp == 0 ? ENOTSUP : 0);
}
static int
ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
return (zap_destroy(os, object, tx));
}
static int
ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde)
{
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
uint64_t one, csize;
int error;
error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, &one, &csize);
if (error)
return (error);
ASSERT(one == 1);
ASSERT(csize <= sizeof (cbuf));
error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, 1, csize, cbuf);
if (error)
return (error);
ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys));
return (0);
}
static void
ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde)
{
(void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS);
}
static int
ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
{
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
uint64_t csize;
csize = ddt_compress(dde->dde_phys, cbuf,
sizeof (dde->dde_phys), sizeof (cbuf));
return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, 1, csize, cbuf, tx));
}
static int
ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx)
{
return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key,
DDT_KEY_WORDS, tx));
}
static int
ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk)
{
zap_cursor_t zc;
zap_attribute_t za;
int error;
zap_cursor_init_serialized(&zc, os, object, *walk);
if ((error = zap_cursor_retrieve(&zc, &za)) == 0) {
uchar_t cbuf[sizeof (dde->dde_phys) + 1];
uint64_t csize = za.za_num_integers;
ASSERT(za.za_integer_length == 1);
error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name,
DDT_KEY_WORDS, 1, csize, cbuf);
ASSERT(error == 0);
if (error == 0) {
ddt_decompress(cbuf, dde->dde_phys, csize,
sizeof (dde->dde_phys));
dde->dde_key = *(ddt_key_t *)za.za_name;
}
zap_cursor_advance(&zc);
*walk = zap_cursor_serialize(&zc);
}
zap_cursor_fini(&zc);
return (error);
}
static uint64_t
ddt_zap_count(objset_t *os, uint64_t object)
{
uint64_t count = 0;
VERIFY(zap_count(os, object, &count) == 0);
return (count);
}
const ddt_ops_t ddt_zap_ops = {
"zap",
ddt_zap_create,
ddt_zap_destroy,
ddt_zap_lookup,
ddt_zap_prefetch,
ddt_zap_update,
ddt_zap_remove,
ddt_zap_walk,
ddt_zap_count,
};

File diff suppressed because it is too large Load Diff

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/dmu.h>
@ -32,16 +31,15 @@ uint64_t
dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
objset_impl_t *osi = os->os;
uint64_t object;
uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
(osi->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
(os->os_meta_dnode->dn_indblkshift - SPA_BLKPTRSHIFT);
dnode_t *dn = NULL;
int restarted = B_FALSE;
mutex_enter(&osi->os_obj_lock);
mutex_enter(&os->os_obj_lock);
for (;;) {
object = osi->os_obj_next;
object = os->os_obj_next;
/*
* Each time we polish off an L2 bp worth of dnodes
* (2^13 objects), move to another L2 bp that's still
@ -51,14 +49,14 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
*/
if (P2PHASE(object, L2_dnode_count) == 0) {
uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
int error = dnode_next_offset(osi->os_meta_dnode,
int error = dnode_next_offset(os->os_meta_dnode,
DNODE_FIND_HOLE,
&offset, 2, DNODES_PER_BLOCK >> 2, 0);
restarted = B_TRUE;
if (error == 0)
object = offset >> DNODE_SHIFT;
}
osi->os_obj_next = ++object;
os->os_obj_next = ++object;
/*
* XXX We should check for an i/o error here and return
@ -66,19 +64,19 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
* dmu_tx_assign(), but there is currently no mechanism
* to do so.
*/
(void) dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE,
(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
FTAG, &dn);
if (dn)
break;
if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
osi->os_obj_next = object - 1;
os->os_obj_next = object - 1;
}
dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
dnode_rele(dn, FTAG);
mutex_exit(&osi->os_obj_lock);
mutex_exit(&os->os_obj_lock);
dmu_tx_add_new_object(tx, os, object);
return (object);
@ -94,7 +92,7 @@ dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
return (EBADF);
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
if (err)
return (err);
dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
@ -116,7 +114,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
if (object == DMU_META_DNODE_OBJECT)
return (EBADF);
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
FTAG, &dn);
if (err)
return (err);
@ -128,7 +126,11 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
return (0);
}
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
if (bonustype == DMU_OT_SA) {
nblkptr = 1;
} else {
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
}
/*
* If we are losing blkptrs or changing the block size this must
@ -166,7 +168,7 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED,
err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
FTAG, &dn);
if (err)
return (err);
@ -185,7 +187,7 @@ dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
int error;
error = dnode_next_offset(os->os->os_meta_dnode,
error = dnode_next_offset(os->os_meta_dnode,
(hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
*objectp = offset >> DNODE_SHIFT;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -33,16 +32,10 @@
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu_impl.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/callb.h>
#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
(zb)->zb_objset = objset; \
(zb)->zb_object = object; \
(zb)->zb_level = level; \
(zb)->zb_blkid = blkid; \
}
struct prefetch_data {
kmutex_t pd_mtx;
kcondvar_t pd_cv;
@ -68,27 +61,28 @@ static int traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
arc_buf_t *buf, uint64_t objset, uint64_t object);
/* ARGSUSED */
static void
static int
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
struct traverse_data *td = arg;
zbookmark_t zb;
if (bp->blk_birth == 0)
return;
return (0);
if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
return;
return (0);
zb.zb_objset = td->td_objset;
zb.zb_object = 0;
zb.zb_level = -1;
zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];
VERIFY(0 == td->td_func(td->td_spa, bp, &zb, NULL, td->td_arg));
SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL, td->td_arg);
return (0);
}
/* ARGSUSED */
static void
static int
traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
{
struct traverse_data *td = arg;
@ -99,17 +93,18 @@ traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
zbookmark_t zb;
if (bp->blk_birth == 0)
return;
return (0);
if (claim_txg == 0 || bp->blk_birth < claim_txg)
return;
return (0);
zb.zb_objset = td->td_objset;
zb.zb_object = lr->lr_foid;
zb.zb_level = BP_GET_LEVEL(bp);
zb.zb_blkid = lr->lr_offset / BP_GET_LSIZE(bp);
VERIFY(0 == td->td_func(td->td_spa, bp, &zb, NULL, td->td_arg));
SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, ZB_ZIL_LEVEL,
lr->lr_offset / BP_GET_LSIZE(bp));
(void) td->td_func(td->td_spa, zilog, bp, NULL, &zb, NULL,
td->td_arg);
}
return (0);
}
static void
@ -120,7 +115,7 @@ traverse_zil(struct traverse_data *td, zil_header_t *zh)
/*
* We only want to visit blocks that have been claimed but not yet
* replayed (or, in read-only mode, blocks that *would* be claimed).
* replayed; plus, in read-only mode, blocks that are already stable.
*/
if (claim_txg == 0 && spa_writeable(td->td_spa))
return;
@ -138,12 +133,14 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
{
zbookmark_t czb;
int err = 0;
int err = 0, lasterr = 0;
arc_buf_t *buf = NULL;
struct prefetch_data *pd = td->td_pfd;
boolean_t hard = td->td_flags & TRAVERSE_HARD;
if (bp->blk_birth == 0) {
err = td->td_func(td->td_spa, NULL, zb, dnp, td->td_arg);
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
td->td_arg);
return (err);
}
@ -163,7 +160,8 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
}
if (td->td_flags & TRAVERSE_PRE) {
err = td->td_func(td->td_spa, bp, zb, dnp, td->td_arg);
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
td->td_arg);
if (err)
return (err);
}
@ -174,7 +172,7 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
blkptr_t *cbp;
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
err = arc_read(NULL, td->td_spa, bp, pbuf,
err = dsl_read(NULL, td->td_spa, bp, pbuf,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
@ -187,15 +185,18 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = traverse_visitbp(td, dnp, buf, cbp, &czb);
if (err)
break;
if (err) {
if (!hard)
break;
lasterr = err;
}
}
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
uint32_t flags = ARC_WAIT;
int i;
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
err = arc_read(NULL, td->td_spa, bp, pbuf,
err = dsl_read(NULL, td->td_spa, bp, pbuf,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
@ -203,18 +204,21 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
/* recursively visitbp() blocks below this */
dnp = buf->b_data;
for (i = 0; i < epb && err == 0; i++, dnp++) {
for (i = 0; i < epb; i++, dnp++) {
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
zb->zb_blkid * epb + i);
if (err)
break;
if (err) {
if (!hard)
break;
lasterr = err;
}
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
dnode_phys_t *dnp;
err = arc_read_nolock(NULL, td->td_spa, bp,
err = dsl_read_nolock(NULL, td->td_spa, bp,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
@ -224,12 +228,21 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
traverse_zil(td, &osp->os_zil_header);
dnp = &osp->os_meta_dnode;
err = traverse_dnode(td, dnp, buf, zb->zb_objset, 0);
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err && hard) {
lasterr = err;
err = 0;
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
dnp = &osp->os_userused_dnode;
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
DMU_USERUSED_OBJECT);
}
if (err && hard) {
lasterr = err;
err = 0;
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
dnp = &osp->os_groupused_dnode;
err = traverse_dnode(td, dnp, buf, zb->zb_objset,
@ -240,33 +253,52 @@ traverse_visitbp(struct traverse_data *td, const dnode_phys_t *dnp,
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
if (err == 0 && (td->td_flags & TRAVERSE_POST))
err = td->td_func(td->td_spa, bp, zb, dnp, td->td_arg);
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
td->td_arg);
}
return (err);
return (err != 0 ? err : lasterr);
}
static int
traverse_dnode(struct traverse_data *td, const dnode_phys_t *dnp,
arc_buf_t *buf, uint64_t objset, uint64_t object)
{
int j, err = 0;
int j, err = 0, lasterr = 0;
zbookmark_t czb;
boolean_t hard = (td->td_flags & TRAVERSE_HARD);
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, buf,
(blkptr_t *)&dnp->dn_blkptr[j], &czb);
if (err)
break;
if (err) {
if (!hard)
break;
lasterr = err;
}
}
return (err);
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset,
object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, buf,
(blkptr_t *)&dnp->dn_spill, &czb);
if (err) {
if (!hard)
return (err);
lasterr = err;
}
}
return (err != 0 ? err : lasterr);
}
/* ARGSUSED */
static int
traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp, void *arg)
traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp,
void *arg)
{
struct prefetch_data *pfd = arg;
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
@ -276,7 +308,8 @@ traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
return (EINTR);
if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0))
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
return (0);
mutex_enter(&pfd->pd_mtx);
@ -286,7 +319,7 @@ traverse_prefetcher(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
cv_broadcast(&pfd->pd_cv);
mutex_exit(&pfd->pd_mtx);
(void) arc_read_nolock(NULL, spa, bp, NULL, NULL,
(void) dsl_read(NULL, spa, bp, pbuf, NULL, NULL,
ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&aflags, zb);
@ -305,7 +338,8 @@ traverse_prefetch_thread(void *arg)
td.td_arg = td_main->td_pfd;
td.td_pfd = NULL;
SET_BOOKMARK(&czb, td.td_objset, 0, -1, 0);
SET_BOOKMARK(&czb, td.td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
(void) traverse_visitbp(&td, NULL, NULL, td.td_rootbp, &czb);
mutex_enter(&td_main->td_pfd->pd_mtx);
@ -346,7 +380,8 @@ traverse_impl(spa_t *spa, uint64_t objset, blkptr_t *rootbp,
&td, TQ_NOQUEUE))
pd.pd_exited = B_TRUE;
SET_BOOKMARK(&czb, objset, 0, -1, 0);
SET_BOOKMARK(&czb, objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
err = traverse_visitbp(&td, NULL, NULL, rootbp, &czb);
mutex_enter(&pd.pd_mtx);
@ -378,43 +413,59 @@ traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
* NB: pool must not be changing on-disk (eg, from zdb or sync context).
*/
int
traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg)
traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
blkptr_cb_t func, void *arg)
{
int err;
int err, lasterr = 0;
uint64_t obj;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
boolean_t hard = (flags & TRAVERSE_HARD);
/* visit the MOS */
err = traverse_impl(spa, 0, spa_get_rootblkptr(spa),
0, TRAVERSE_PRE, func, arg);
txg_start, flags, func, arg);
if (err)
return (err);
/* visit each dataset */
for (obj = 1; err == 0; err = dmu_object_next(mos, &obj, FALSE, 0)) {
for (obj = 1; err == 0 || (err != ESRCH && hard);
err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
if (err)
return (err);
if (err) {
if (!hard)
return (err);
lasterr = err;
continue;
}
if (doi.doi_type == DMU_OT_DSL_DATASET) {
dsl_dataset_t *ds;
uint64_t txg = txg_start;
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
rw_exit(&dp->dp_config_rwlock);
if (err)
return (err);
err = traverse_dataset(ds,
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_PRE,
func, arg);
if (err) {
if (!hard)
return (err);
lasterr = err;
continue;
}
if (ds->ds_phys->ds_prev_snap_txg > txg)
txg = ds->ds_phys->ds_prev_snap_txg;
err = traverse_dataset(ds, txg, flags, func, arg);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
if (err) {
if (!hard)
return (err);
lasterr = err;
}
}
}
if (err == ESRCH)
err = 0;
return (err);
return (err != 0 ? err : lasterr);
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/dmu.h>
@ -33,7 +32,10 @@
#include <sys/dsl_pool.h>
#include <sys/zap_impl.h> /* for fzap_default_block_shift */
#include <sys/spa.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/zfs_context.h>
#include <sys/varargs.h>
typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
uint64_t arg1, uint64_t arg2);
@ -48,6 +50,8 @@ dmu_tx_create_dd(dsl_dir_t *dd)
tx->tx_pool = dd->dd_pool;
list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
offsetof(dmu_tx_hold_t, txh_node));
list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
offsetof(dmu_tx_callback_t, dcb_node));
#ifdef ZFS_DEBUG
refcount_create(&tx->tx_space_written);
refcount_create(&tx->tx_space_freed);
@ -58,9 +62,9 @@ dmu_tx_create_dd(dsl_dir_t *dd)
dmu_tx_t *
dmu_tx_create(objset_t *os)
{
dmu_tx_t *tx = dmu_tx_create_dd(os->os->os_dsl_dataset->ds_dir);
dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
tx->tx_objset = os;
tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os->os_dsl_dataset);
tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
return (tx);
}
@ -98,7 +102,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
int err;
if (object != DMU_NEW_OBJECT) {
err = dnode_hold(os->os, object, tx, &dn);
err = dnode_hold(os, object, tx, &dn);
if (err) {
tx->tx_err = err;
return (NULL);
@ -161,38 +165,47 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
}
static void
dmu_tx_count_indirects(dmu_tx_hold_t *txh, dmu_buf_impl_t *db,
boolean_t freeable, dmu_buf_impl_t **history)
dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db,
int level, uint64_t blkid, boolean_t freeable, uint64_t *history)
{
int i = db->db_level + 1;
dnode_t *dn = db->db_dnode;
objset_t *os = dn->dn_objset;
dsl_dataset_t *ds = os->os_dsl_dataset;
int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
dmu_buf_impl_t *parent = NULL;
blkptr_t *bp = NULL;
uint64_t space;
if (i >= dn->dn_nlevels)
if (level >= dn->dn_nlevels || history[level] == blkid)
return;
db = db->db_parent;
if (db == NULL) {
uint64_t lvls = dn->dn_nlevels - i;
history[level] = blkid;
txh->txh_space_towrite += lvls << dn->dn_indblkshift;
return;
space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift);
if (db == NULL || db == dn->dn_dbuf) {
ASSERT(level != 0);
db = NULL;
} else {
ASSERT(db->db_dnode == dn);
ASSERT(db->db_level == level);
ASSERT(db->db.db_size == space);
ASSERT(db->db_blkid == blkid);
bp = db->db_blkptr;
parent = db->db_parent;
}
if (db != history[i]) {
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
uint64_t space = 1ULL << dn->dn_indblkshift;
freeable = (bp && (freeable ||
dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
freeable = (db->db_blkptr && (freeable ||
dsl_dataset_block_freeable(ds, db->db_blkptr->blk_birth)));
if (freeable)
txh->txh_space_tooverwrite += space;
else
txh->txh_space_towrite += space;
if (db->db_blkptr)
txh->txh_space_tounref += space;
history[i] = db;
dmu_tx_count_indirects(txh, db, freeable, history);
}
if (freeable)
txh->txh_space_tooverwrite += space;
else
txh->txh_space_towrite += space;
if (bp)
txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp);
dmu_tx_count_twig(txh, dn, parent, level + 1,
blkid >> epbs, freeable, history);
}
/* ARGSUSED */
@ -213,7 +226,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
max_ibs = DN_MAX_INDBLKSHIFT;
if (dn) {
dmu_buf_impl_t *last[DN_MAX_LEVELS];
uint64_t history[DN_MAX_LEVELS];
int nlvls = dn->dn_nlevels;
int delta;
@ -289,29 +302,24 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
* If this write is not off the end of the file
* we need to account for overwrites/unref.
*/
if (start <= dn->dn_maxblkid)
bzero(last, sizeof (dmu_buf_impl_t *) * DN_MAX_LEVELS);
if (start <= dn->dn_maxblkid) {
for (int l = 0; l < DN_MAX_LEVELS; l++)
history[l] = -1ULL;
}
while (start <= dn->dn_maxblkid) {
spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
dmu_buf_impl_t *db;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold_level(dn, 0, start, FTAG);
err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
rw_exit(&dn->dn_struct_rwlock);
if (db->db_blkptr && dsl_dataset_block_freeable(ds,
db->db_blkptr->blk_birth)) {
dprintf_bp(db->db_blkptr, "can free old%s", "");
txh->txh_space_tooverwrite += dn->dn_datablksz;
txh->txh_space_tounref += dn->dn_datablksz;
dmu_tx_count_indirects(txh, db, TRUE, last);
} else {
txh->txh_space_towrite += dn->dn_datablksz;
if (db->db_blkptr)
txh->txh_space_tounref +=
bp_get_dasize(spa, db->db_blkptr);
dmu_tx_count_indirects(txh, db, FALSE, last);
if (err) {
txh->txh_tx->tx_err = err;
return;
}
dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
history);
dbuf_rele(db, FTAG);
if (++start > end) {
/*
@ -376,13 +384,13 @@ static void
dmu_tx_count_dnode(dmu_tx_hold_t *txh)
{
dnode_t *dn = txh->txh_dnode;
dnode_t *mdn = txh->txh_tx->tx_objset->os->os_meta_dnode;
dnode_t *mdn = txh->txh_tx->tx_objset->os_meta_dnode;
uint64_t space = mdn->dn_datablksz +
((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
if (dn && dn->dn_dbuf->db_blkptr &&
dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
dn->dn_dbuf->db_blkptr->blk_birth)) {
dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
txh->txh_space_tooverwrite += space;
txh->txh_space_tounref += space;
} else {
@ -427,7 +435,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
* The struct_rwlock protects us against dn_nlevels
* changing, in case (against all odds) we manage to dirty &
* sync out the changes after we check for being dirty.
* Also, dbuf_hold_level() wants us to have the struct_rwlock.
* Also, dbuf_hold_impl() wants us to have the struct_rwlock.
*/
rw_enter(&dn->dn_struct_rwlock, RW_READER);
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
@ -457,9 +465,9 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
blkptr_t *bp = dn->dn_phys->dn_blkptr;
ASSERT3U(blkid + i, <, dn->dn_nblkptr);
bp += blkid + i;
if (dsl_dataset_block_freeable(ds, bp->blk_birth)) {
if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
dprintf_bp(bp, "can free old%s", "");
space += bp_get_dasize(spa, bp);
space += bp_get_dsize(spa, bp);
}
unref += BP_GET_ASIZE(bp);
}
@ -515,14 +523,22 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
blkoff = P2PHASE(blkid, epb);
tochk = MIN(epb - blkoff, nblks);
dbuf = dbuf_hold_level(dn, 1, blkid >> epbs, FTAG);
txh->txh_memory_tohold += dbuf->db.db_size;
if (txh->txh_memory_tohold > DMU_MAX_ACCESS) {
txh->txh_tx->tx_err = E2BIG;
dbuf_rele(dbuf, FTAG);
err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
if (err) {
txh->txh_tx->tx_err = err;
break;
}
txh->txh_memory_tohold += dbuf->db.db_size;
/*
* We don't check memory_tohold against DMU_MAX_ACCESS because
* memory_tohold is an over-estimation (especially the >L1
* indirect blocks), so it could fail. Callers should have
* already verified that they will not be holding too much
* memory.
*/
err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
if (err != 0) {
txh->txh_tx->tx_err = err;
@ -534,9 +550,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
bp += blkoff;
for (i = 0; i < tochk; i++) {
if (dsl_dataset_block_freeable(ds, bp[i].blk_birth)) {
if (dsl_dataset_block_freeable(ds, &bp[i],
bp[i].blk_birth)) {
dprintf_bp(&bp[i], "can free old%s", "");
space += bp_get_dasize(spa, &bp[i]);
space += bp_get_dsize(spa, &bp[i]);
}
unref += BP_GET_ASIZE(bp);
}
@ -581,6 +598,8 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
if (len != DMU_OBJECT_END)
dmu_tx_count_write(txh, off+len, 1);
dmu_tx_count_dnode(txh);
if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
return;
if (len == DMU_OBJECT_END)
@ -623,7 +642,6 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
}
}
dmu_tx_count_dnode(txh);
dmu_tx_count_free(txh, off, len);
}
@ -673,6 +691,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
* the size will change between now and the dbuf dirty call.
*/
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
&dn->dn_phys->dn_blkptr[0],
dn->dn_phys->dn_blkptr[0].blk_birth)) {
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
} else {
@ -688,7 +707,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
* access the name in this fat-zap so that we'll check
* for i/o errors to the leaf blocks, etc.
*/
err = zap_lookup(&dn->dn_objset->os, dn->dn_object, name,
err = zap_lookup(dn->dn_objset, dn->dn_object, name,
8, 0, NULL);
if (err == EIO) {
tx->tx_err = err;
@ -696,7 +715,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
}
}
err = zap_count_write(&dn->dn_objset->os, dn->dn_object, name, add,
err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
&txh->txh_space_towrite, &txh->txh_space_tooverwrite);
/*
@ -771,7 +790,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
dnode_t *dn = db->db_dnode;
ASSERT(tx->tx_txg != 0);
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset->os);
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
ASSERT3U(dn->dn_object, ==, db->db.db_object);
if (tx->tx_anyobj)
@ -808,10 +827,11 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
match_offset = TRUE;
/*
* We will let this hold work for the bonus
* buffer so that we don't need to hold it
* when creating a new object.
* or spill buffer so that we don't need to
* hold it when creating a new object.
*/
if (blkid == DB_BONUS_BLKID)
if (blkid == DMU_BONUS_BLKID ||
blkid == DMU_SPILL_BLKID)
match_offset = TRUE;
/*
* They might have to increase nlevels,
@ -832,8 +852,12 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
txh->txh_arg2 == DMU_OBJECT_END))
match_offset = TRUE;
break;
case THT_SPILL:
if (blkid == DMU_SPILL_BLKID)
match_offset = TRUE;
break;
case THT_BONUS:
if (blkid == DB_BONUS_BLKID)
if (blkid == DMU_BONUS_BLKID)
match_offset = TRUE;
break;
case THT_ZAP:
@ -931,7 +955,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
* assume that we won't be able to free or overwrite anything.
*/
if (tx->tx_objset &&
dsl_dataset_prev_snap_txg(tx->tx_objset->os->os_dsl_dataset) >
dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
tx->tx_lastsnap_txg) {
towrite += tooverwrite;
tooverwrite = tofree = 0;
@ -1112,8 +1136,13 @@ dmu_tx_commit(dmu_tx_t *tx)
if (tx->tx_tempreserve_cookie)
dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
if (!list_is_empty(&tx->tx_callbacks))
txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
if (tx->tx_anyobj == FALSE)
txg_rele_to_sync(&tx->tx_txgh);
list_destroy(&tx->tx_callbacks);
list_destroy(&tx->tx_holds);
#ifdef ZFS_DEBUG
dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
@ -1142,6 +1171,14 @@ dmu_tx_abort(dmu_tx_t *tx)
if (dn != NULL)
dnode_rele(dn, tx);
}
/*
* Call any registered callbacks with an error code.
*/
if (!list_is_empty(&tx->tx_callbacks))
dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
list_destroy(&tx->tx_callbacks);
list_destroy(&tx->tx_holds);
#ifdef ZFS_DEBUG
refcount_destroy_many(&tx->tx_space_written,
@ -1158,3 +1195,169 @@ dmu_tx_get_txg(dmu_tx_t *tx)
ASSERT(tx->tx_txg != 0);
return (tx->tx_txg);
}
void
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
{
dmu_tx_callback_t *dcb;
dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP);
dcb->dcb_func = func;
dcb->dcb_data = data;
list_insert_tail(&tx->tx_callbacks, dcb);
}
/*
* Call all the commit callbacks on a list, with a given error code.
*/
void
dmu_tx_do_callbacks(list_t *cb_list, int error)
{
dmu_tx_callback_t *dcb;
while (dcb = list_head(cb_list)) {
list_remove(cb_list, dcb);
dcb->dcb_func(dcb->dcb_data, error);
kmem_free(dcb, sizeof (dmu_tx_callback_t));
}
}
/*
* Interface to hold a bunch of attributes.
* used for creating new files.
* attrsize is the total size of all attributes
* to be added during object creation
*
* For updating/adding a single attribute dmu_tx_hold_sa() should be used.
*/
/*
* hold necessary attribute name for attribute registration.
* should be a very rare case where this is needed. If it does
* happen it would only happen on the first write to the file system.
*/
static void
dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
{
int i;
if (!sa->sa_need_attr_registration)
return;
for (i = 0; i != sa->sa_num_attrs; i++) {
if (!sa->sa_attr_table[i].sa_registered) {
if (sa->sa_reg_attr_obj)
dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
B_TRUE, sa->sa_attr_table[i].sa_name);
else
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT,
B_TRUE, sa->sa_attr_table[i].sa_name);
}
}
}
void
dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
{
dnode_t *dn;
dmu_tx_hold_t *txh;
blkptr_t *bp;
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
THT_SPILL, 0, 0);
dn = txh->txh_dnode;
if (dn == NULL)
return;
/* If blkptr doesn't exist then add space to towrite */
bp = &dn->dn_phys->dn_spill;
if (BP_IS_HOLE(bp)) {
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
txh->txh_space_tounref = 0;
} else {
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
bp, bp->blk_birth))
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
else
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
if (bp->blk_birth)
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
}
}
void
dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
{
sa_os_t *sa = tx->tx_objset->os_sa;
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
if (tx->tx_objset->os_sa->sa_master_obj == 0)
return;
if (tx->tx_objset->os_sa->sa_layout_attr_obj)
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
else {
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
}
dmu_tx_sa_registration_hold(sa, tx);
if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
return;
(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
THT_SPILL, 0, 0);
}
/*
* Hold SA attribute
*
* dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size)
*
* variable_size is the total size of all variable sized attributes
* passed to this function. It is not the total size of all
* variable size attributes that *may* exist on this object.
*/
void
dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
{
uint64_t object;
sa_os_t *sa = tx->tx_objset->os_sa;
ASSERT(hdl != NULL);
object = sa_handle_object(hdl);
dmu_tx_hold_bonus(tx, object);
if (tx->tx_objset->os_sa->sa_master_obj == 0)
return;
if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 ||
tx->tx_objset->os_sa->sa_layout_attr_obj == 0) {
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
}
dmu_tx_sa_registration_hold(sa, tx);
if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj)
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
if (sa->sa_force_spill || may_grow || hdl->sa_spill ||
((dmu_buf_impl_t *)hdl->sa_bonus)->db_dnode->dn_have_spill) {
ASSERT(tx->tx_txg == 0);
dmu_tx_hold_spill(tx, object);
}
}

View File

@ -19,18 +19,17 @@
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#include <sys/dnode.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_zfetch.h>
#include <sys/dmu.h>
#include <sys/dbuf.h>
#include <sys/kstat.h>
/*
* I'm against tune-ables, but these should probably exist as tweakable globals
@ -59,6 +58,41 @@ static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
kstat_named_t zfetchstat_misses;
kstat_named_t zfetchstat_colinear_hits;
kstat_named_t zfetchstat_colinear_misses;
kstat_named_t zfetchstat_stride_hits;
kstat_named_t zfetchstat_stride_misses;
kstat_named_t zfetchstat_reclaim_successes;
kstat_named_t zfetchstat_reclaim_failures;
kstat_named_t zfetchstat_stream_resets;
kstat_named_t zfetchstat_stream_noresets;
kstat_named_t zfetchstat_bogus_streams;
} zfetch_stats_t;
static zfetch_stats_t zfetch_stats = {
{ "hits", KSTAT_DATA_UINT64 },
{ "misses", KSTAT_DATA_UINT64 },
{ "colinear_hits", KSTAT_DATA_UINT64 },
{ "colinear_misses", KSTAT_DATA_UINT64 },
{ "stride_hits", KSTAT_DATA_UINT64 },
{ "stride_misses", KSTAT_DATA_UINT64 },
{ "reclaim_successes", KSTAT_DATA_UINT64 },
{ "reclaim_failures", KSTAT_DATA_UINT64 },
{ "streams_resets", KSTAT_DATA_UINT64 },
{ "streams_noresets", KSTAT_DATA_UINT64 },
{ "bogus_streams", KSTAT_DATA_UINT64 },
};
#define ZFETCHSTAT_INCR(stat, val) \
atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
kstat_t *zfetch_ksp;
/*
* Given a zfetch structure and a zstream structure, determine whether the
* blocks to be read are part of a co-linear pair of existing prefetch
@ -192,7 +226,30 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
break;
}
zs->zst_ph_offset = prefetch_tail;
zs->zst_last = lbolt;
zs->zst_last = ddi_get_lbolt();
}
void
zfetch_init(void)
{
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (zfetch_ksp != NULL) {
zfetch_ksp->ks_data = &zfetch_stats;
kstat_install(zfetch_ksp);
}
}
void
zfetch_fini(void)
{
if (zfetch_ksp != NULL) {
kstat_delete(zfetch_ksp);
zfetch_ksp = NULL;
}
}
/*
@ -265,7 +322,7 @@ dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
}
/*
* given a zfetch and a zsearch structure, see if there is an associated zstream
* given a zfetch and a zstream structure, see if there is an associated zstream
* for this block read. If so, it starts a prefetch for the stream it
* located and returns true, otherwise it returns false
*/
@ -297,6 +354,7 @@ top:
*/
if (zs->zst_len == 0) {
/* bogus stream */
ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
continue;
}
@ -306,9 +364,14 @@ top:
*/
if (zh->zst_offset >= zs->zst_offset &&
zh->zst_offset < zs->zst_offset + zs->zst_len) {
/* already fetched */
rc = 1;
goto out;
if (prefetched) {
/* already fetched */
ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
rc = 1;
goto out;
} else {
ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
}
}
/*
@ -413,6 +476,7 @@ top:
if (reset) {
zstream_t *remove = zs;
ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
rc = 0;
mutex_exit(&zs->zst_lock);
rw_exit(&zf->zf_rwlock);
@ -431,6 +495,7 @@ top:
}
}
} else {
ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
rc = 1;
dmu_zfetch_dofetch(zf, zs);
mutex_exit(&zs->zst_lock);
@ -487,13 +552,12 @@ dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
zs_next = list_next(&zf->zf_stream, zs_walk);
if (dmu_zfetch_streams_equal(zs_walk, zs)) {
return (0);
return (0);
}
}
list_insert_head(&zf->zf_stream, zs);
zf->zf_stream_cnt++;
return (1);
}
@ -513,7 +577,7 @@ dmu_zfetch_stream_reclaim(zfetch_t *zf)
for (zs = list_head(&zf->zf_stream); zs;
zs = list_next(&zf->zf_stream, zs)) {
if (((lbolt - zs->zst_last) / hz) > zfetch_min_sec_reap)
if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
break;
}
@ -597,8 +661,15 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
P2ALIGN(offset, blksz)) >> blkshft;
fetched = dmu_zfetch_find(zf, &zst, prefetched);
if (!fetched) {
fetched = dmu_zfetch_colinear(zf, &zst);
if (fetched) {
ZFETCHSTAT_BUMP(zfetchstat_hits);
} else {
ZFETCHSTAT_BUMP(zfetchstat_misses);
if (fetched = dmu_zfetch_colinear(zf, &zst)) {
ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
} else {
ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
}
}
if (!fetched) {
@ -608,11 +679,14 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
* we still couldn't find a stream, drop the lock, and allocate
* one if possible. Otherwise, give up and go home.
*/
if (newstream == NULL) {
if (newstream) {
ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
} else {
uint64_t maxblocks;
uint32_t max_streams;
uint32_t cur_streams;
ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
cur_streams = zf->zf_stream_cnt;
maxblocks = zf->zf_dnode->dn_maxblkid;
@ -625,7 +699,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
if (cur_streams >= max_streams) {
return;
}
newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
}
@ -635,7 +708,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
newstream->zst_cap = zst.zst_len;
newstream->zst_direction = ZFETCH_FORWARD;
newstream->zst_last = lbolt;
newstream->zst_last = ddi_get_lbolt();
mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -210,6 +209,11 @@ dnode_byteswap(dnode_phys_t *dnp)
ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
}
/* Swap SPILL block if we have one */
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
}
void
@ -258,6 +262,27 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
rw_exit(&dn->dn_struct_rwlock);
}
void
dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
{
ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
dnode_setdirty(dn, tx);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dn->dn_bonustype = newtype;
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
rw_exit(&dn->dn_struct_rwlock);
}
void
dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
{
ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
dnode_setdirty(dn, tx);
dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
dn->dn_have_spill = B_FALSE;
}
static void
dnode_setdblksz(dnode_t *dn, int size)
{
@ -272,7 +297,7 @@ dnode_setdblksz(dnode_t *dn, int size)
}
static dnode_t *
dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object)
{
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
@ -294,6 +319,8 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dn->dn_bonustype = dnp->dn_bonustype;
dn->dn_bonuslen = dnp->dn_bonuslen;
dn->dn_maxblkid = dnp->dn_maxblkid;
dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
dn->dn_id_flags = 0;
dmu_zfetch_init(&dn->dn_zfetch, dn);
@ -309,7 +336,7 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
static void
dnode_destroy(dnode_t *dn)
{
objset_impl_t *os = dn->dn_objset;
objset_t *os = dn->dn_objset;
#ifdef ZFS_DEBUG
int i;
@ -321,7 +348,7 @@ dnode_destroy(dnode_t *dn)
}
ASSERT(NULL == list_head(&dn->dn_dbufs));
#endif
ASSERT(dn->dn_oldphys == NULL);
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
mutex_enter(&os->os_lock);
list_remove(&os->os_dnodes, dn);
@ -368,6 +395,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(ot != DMU_OT_NONE);
ASSERT3U(ot, <, DMU_OT_NUMTYPES);
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
@ -383,6 +411,8 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT3U(dn->dn_next_nlevels[i], ==, 0);
ASSERT3U(dn->dn_next_indblkshift[i], ==, 0);
ASSERT3U(dn->dn_next_bonuslen[i], ==, 0);
ASSERT3U(dn->dn_next_bonustype[i], ==, 0);
ASSERT3U(dn->dn_rm_spillblk[i], ==, 0);
ASSERT3U(dn->dn_next_blksz[i], ==, 0);
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
@ -393,7 +423,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dnode_setdblksz(dn, blocksize);
dn->dn_indblkshift = ibs;
dn->dn_nlevels = 1;
dn->dn_nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
dn->dn_nblkptr = 1;
else
dn->dn_nblkptr = 1 +
((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
dn->dn_bonustype = bonustype;
dn->dn_bonuslen = bonuslen;
dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
@ -407,10 +441,12 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
}
dn->dn_allocated_txg = tx->tx_txg;
dn->dn_id_flags = 0;
dnode_setdirty(dn, tx);
dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
}
@ -426,13 +462,16 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
ASSERT(tx->tx_txg != 0);
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0));
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
/* clean up any unreferenced dbufs */
dnode_evict_dbufs(dn);
dn->dn_id_flags = 0;
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
dnode_setdirty(dn, tx);
if (dn->dn_datablksz != blocksize) {
@ -445,9 +484,19 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
}
if (dn->dn_bonuslen != bonuslen)
dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
nblkptr = 1;
else
nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
if (dn->dn_bonustype != bonustype)
dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
if (dn->dn_nblkptr != nblkptr)
dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
dbuf_rm_spill(dn, tx);
dnode_rm_spill(dn, tx);
}
rw_exit(&dn->dn_struct_rwlock);
/* change type */
@ -488,7 +537,7 @@ dnode_special_close(dnode_t *dn)
}
dnode_t *
dnode_special_open(objset_impl_t *os, dnode_phys_t *dnp, uint64_t object)
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object)
{
dnode_t *dn = dnode_create(os, dnp, NULL, object);
DNODE_VERIFY(dn);
@ -535,7 +584,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
* succeeds even for free dnodes.
*/
int
dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
dnode_hold_impl(objset_t *os, uint64_t object, int flag,
void *tag, dnode_t **dnp)
{
int epb, idx, err;
@ -548,9 +597,14 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
/*
* If you are holding the spa config lock as writer, you shouldn't
* be asking the DMU to do *anything*.
* be asking the DMU to do *anything* unless it's the root pool
* which may require us to read from the root filesystem while
* holding some (not all) of the locks as writer.
*/
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0);
ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
(spa_is_root(os->os_spa) &&
spa_config_held(os->os_spa, SCL_STATE, RW_WRITER) &&
!spa_config_held(os->os_spa, SCL_ZIO, RW_WRITER)));
if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
dn = (object == DMU_USERUSED_OBJECT) ?
@ -627,7 +681,7 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
if (dn->dn_free_txg ||
((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
((flag & DNODE_MUST_BE_FREE) &&
(type != DMU_OT_NONE || dn->dn_oldphys))) {
(type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
mutex_exit(&dn->dn_mtx);
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
@ -650,7 +704,7 @@ dnode_hold_impl(objset_impl_t *os, uint64_t object, int flag,
* Return held dnode if the object is allocated, NULL if not.
*/
int
dnode_hold(objset_impl_t *os, uint64_t object, void *tag, dnode_t **dnp)
dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
{
return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
}
@ -689,7 +743,7 @@ dnode_rele(dnode_t *dn, void *tag)
void
dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
{
objset_impl_t *os = dn->dn_objset;
objset_t *os = dn->dn_objset;
uint64_t txg = tx->tx_txg;
if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
@ -706,6 +760,11 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
#endif
/*
* Determine old uid/gid when necessary
*/
dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
mutex_enter(&os->os_lock);
/*
@ -720,6 +779,7 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dn->dn_datablksz != 0);
ASSERT3U(dn->dn_next_bonuslen[txg&TXG_MASK], ==, 0);
ASSERT3U(dn->dn_next_blksz[txg&TXG_MASK], ==, 0);
ASSERT3U(dn->dn_next_bonustype[txg&TXG_MASK], ==, 0);
dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
dn->dn_object, txg);
@ -814,7 +874,8 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
db_next = list_next(&dn->dn_dbufs, db);
if (db->db_blkid != 0 && db->db_blkid != DB_BONUS_BLKID) {
if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
db->db_blkid != DMU_SPILL_BLKID) {
mutex_exit(&dn->dn_dbufs_mtx);
goto fail;
}
@ -858,7 +919,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
int epbs, new_nlevels;
uint64_t sz;
ASSERT(blkid != DB_BONUS_BLKID);
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(have_read ?
RW_READ_HELD(&dn->dn_struct_rwlock) :
@ -905,6 +966,7 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
/* dirty the left indirects */
db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
ASSERT(db != NULL);
new = dbuf_dirty(db, tx);
dbuf_rele(db, FTAG);
@ -915,7 +977,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
for (dr = list_head(list); dr; dr = dr_next) {
dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
if (dr->dr_dbuf->db_level != new_nlevels-1 &&
dr->dr_dbuf->db_blkid != DB_BONUS_BLKID) {
dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
list_remove(&dn->dn_dirty_records[txgoff], dr);
list_insert_tail(&new->dt.di.dr_children, dr);
@ -1170,6 +1233,20 @@ out:
rw_exit(&dn->dn_struct_rwlock);
}
static boolean_t
dnode_spill_freed(dnode_t *dn)
{
int i;
mutex_enter(&dn->dn_mtx);
for (i = 0; i < TXG_SIZE; i++) {
if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
break;
}
mutex_exit(&dn->dn_mtx);
return (i < TXG_SIZE);
}
/* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
uint64_t
dnode_block_freed(dnode_t *dn, uint64_t blkid)
@ -1178,7 +1255,7 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
void *dp = spa_get_dsl(dn->dn_objset->os_spa);
int i;
if (blkid == DB_BONUS_BLKID)
if (blkid == DMU_BONUS_BLKID)
return (FALSE);
/*
@ -1191,6 +1268,9 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
if (dn->dn_free_txg)
return (TRUE);
if (blkid == DMU_SPILL_BLKID)
return (dnode_spill_freed(dn));
range_tofind.fr_blkid = blkid;
mutex_enter(&dn->dn_mtx);
for (i = 0; i < TXG_SIZE; i++) {
@ -1248,7 +1328,7 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
void
dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
{
objset_impl_t *os = dn->dn_objset;
objset_t *os = dn->dn_objset;
dsl_dataset_t *ds = os->os_dsl_dataset;
if (space > 0)

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -120,7 +119,7 @@ free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
continue;
bytesfreed += dsl_dataset_block_kill(ds, bp, dn->dn_zio, tx);
bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
bzero(bp, sizeof (blkptr_t));
blocks_freed += 1;
@ -228,7 +227,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
if (db->db_state != DB_CACHED)
(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
arc_release(db->db_buf, db);
dbuf_release_bp(db);
bp = (blkptr_t *)db->db.db_data;
epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
@ -424,6 +423,9 @@ dnode_undirty_dbufs(list_t *list)
dmu_buf_impl_t *db = dr->dr_dbuf;
uint64_t txg = dr->dr_txg;
if (db->db_level != 0)
dnode_undirty_dbufs(&dr->dt.di.dr_children);
mutex_enter(&db->db_mtx);
/* XXX - use dbuf_undirty()? */
list_remove(list, dr);
@ -431,16 +433,12 @@ dnode_undirty_dbufs(list_t *list)
db->db_last_dirty = NULL;
db->db_dirtycnt -= 1;
if (db->db_level == 0) {
ASSERT(db->db_blkid == DB_BONUS_BLKID ||
ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
dr->dt.dl.dr_data == db->db_buf);
dbuf_unoverride(dr);
mutex_exit(&db->db_mtx);
} else {
mutex_exit(&db->db_mtx);
dnode_undirty_dbufs(&dr->dt.di.dr_children);
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
dbuf_rele(db, (void *)(uintptr_t)txg);
dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
}
}
@ -491,6 +489,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dn->dn_maxblkid = 0;
dn->dn_allocated_txg = 0;
dn->dn_free_txg = 0;
dn->dn_have_spill = B_FALSE;
mutex_exit(&dn->dn_mtx);
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
@ -513,6 +512,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
int txgoff = tx->tx_txg & TXG_MASK;
list_t *list = &dn->dn_dirty_records[txgoff];
static const dnode_phys_t zerodn = { 0 };
boolean_t kill_spill = B_FALSE;
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
@ -524,10 +524,12 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
if (dmu_objset_userused_enabled(dn->dn_objset) &&
!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
ASSERT(dn->dn_oldphys == NULL);
dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t));
*dn->dn_oldphys = *dn->dn_phys; /* struct assignment */
mutex_enter(&dn->dn_mtx);
dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
dn->dn_oldflags = dn->dn_phys->dn_flags;
dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
mutex_exit(&dn->dn_mtx);
dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
} else {
/* Once we account for it, we should always account for it. */
ASSERT(!(dn->dn_phys->dn_flags &
@ -558,6 +560,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
SPA_MINBLOCKSIZE) == 0);
ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
dn->dn_maxblkid == 0 || list_head(list) != NULL ||
avl_last(&dn->dn_ranges[txgoff]) ||
dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
dnp->dn_datablkszsec);
dnp->dn_datablkszsec =
@ -574,6 +577,24 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dn->dn_next_bonuslen[txgoff] = 0;
}
if (dn->dn_next_bonustype[txgoff]) {
ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
dn->dn_next_bonustype[txgoff] = 0;
}
/*
* We will either remove a spill block when a file is being removed
* or we have been asked to remove it.
*/
if (dn->dn_rm_spillblk[txgoff] ||
((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
kill_spill = B_TRUE;
dn->dn_rm_spillblk[txgoff] = 0;
}
if (dn->dn_next_indblkshift[txgoff]) {
ASSERT(dnp->dn_nlevels == 1);
dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
@ -590,6 +611,13 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
if (kill_spill) {
(void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
mutex_enter(&dn->dn_mtx);
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
mutex_exit(&dn->dn_mtx);
}
/* process all the "freed" ranges in the file */
while (rp = avl_last(&dn->dn_ranges[txgoff])) {
dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);

File diff suppressed because it is too large Load Diff

474
module/zfs/dsl_deadlist.c Normal file
View File

@ -0,0 +1,474 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/dsl_dataset.h>
#include <sys/dmu.h>
#include <sys/refcount.h>
#include <sys/zap.h>
#include <sys/zfs_context.h>
#include <sys/dsl_pool.h>
static int
dsl_deadlist_compare(const void *arg1, const void *arg2)
{
const dsl_deadlist_entry_t *dle1 = arg1;
const dsl_deadlist_entry_t *dle2 = arg2;
if (dle1->dle_mintxg < dle2->dle_mintxg)
return (-1);
else if (dle1->dle_mintxg > dle2->dle_mintxg)
return (+1);
else
return (0);
}
static void
dsl_deadlist_load_tree(dsl_deadlist_t *dl)
{
zap_cursor_t zc;
zap_attribute_t za;
ASSERT(!dl->dl_oldfmt);
if (dl->dl_havetree)
return;
avl_create(&dl->dl_tree, dsl_deadlist_compare,
sizeof (dsl_deadlist_entry_t),
offsetof(dsl_deadlist_entry_t, dle_node));
for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = strtonum(za.za_name, NULL);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
za.za_first_integer));
avl_add(&dl->dl_tree, dle);
}
zap_cursor_fini(&zc);
dl->dl_havetree = B_TRUE;
}
void
dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
{
dmu_object_info_t doi;
mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
dl->dl_os = os;
dl->dl_object = object;
VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
dmu_object_info_from_db(dl->dl_dbuf, &doi);
if (doi.doi_type == DMU_OT_BPOBJ) {
dmu_buf_rele(dl->dl_dbuf, dl);
dl->dl_dbuf = NULL;
dl->dl_oldfmt = B_TRUE;
VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
return;
}
dl->dl_oldfmt = B_FALSE;
dl->dl_phys = dl->dl_dbuf->db_data;
dl->dl_havetree = B_FALSE;
}
void
dsl_deadlist_close(dsl_deadlist_t *dl)
{
void *cookie = NULL;
dsl_deadlist_entry_t *dle;
if (dl->dl_oldfmt) {
dl->dl_oldfmt = B_FALSE;
bpobj_close(&dl->dl_bpobj);
return;
}
if (dl->dl_havetree) {
while ((dle = avl_destroy_nodes(&dl->dl_tree, &cookie))
!= NULL) {
bpobj_close(&dle->dle_bpobj);
kmem_free(dle, sizeof (*dle));
}
avl_destroy(&dl->dl_tree);
}
dmu_buf_rele(dl->dl_dbuf, dl);
mutex_destroy(&dl->dl_lock);
dl->dl_dbuf = NULL;
dl->dl_phys = NULL;
}
uint64_t
dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx)
{
if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx));
return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR,
sizeof (dsl_deadlist_phys_t), tx));
}
void
dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
{
dmu_object_info_t doi;
zap_cursor_t zc;
zap_attribute_t za;
VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
bpobj_free(os, dlobj, tx);
return;
}
for (zap_cursor_init(&zc, os, dlobj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc))
bpobj_free(os, za.za_first_integer, tx);
zap_cursor_fini(&zc);
VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
}
void
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
avl_index_t where;
if (dl->dl_oldfmt) {
bpobj_enqueue(&dl->dl_bpobj, bp, tx);
return;
}
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used +=
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = bp->blk_birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
else
dle = AVL_PREV(&dl->dl_tree, dle);
bpobj_enqueue(&dle->dle_bpobj, bp, tx);
}
/*
* Insert new key in deadlist, which must be > all current entries.
* mintxg is not inclusive.
*/
void
dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
{
uint64_t obj;
dsl_deadlist_entry_t *dle;
if (dl->dl_oldfmt)
return;
dsl_deadlist_load_tree(dl);
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
}
/*
* Remove this key, merging its entries into the previous key.
*/
void
dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle, *dle_prev;
if (dl->dl_oldfmt)
return;
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
dle_prev = AVL_PREV(&dl->dl_tree, dle);
bpobj_enqueue_subobj(&dle_prev->dle_bpobj,
dle->dle_bpobj.bpo_object, tx);
avl_remove(&dl->dl_tree, dle);
bpobj_close(&dle->dle_bpobj);
kmem_free(dle, sizeof (*dle));
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
}
/*
* Walk ds's snapshots to regenerate generate ZAP & AVL.
*/
static void
dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj,
uint64_t mrs_obj, dmu_tx_t *tx)
{
dsl_deadlist_t dl;
dsl_pool_t *dp = dmu_objset_pool(os);
dsl_deadlist_open(&dl, os, dlobj);
if (dl.dl_oldfmt) {
dsl_deadlist_close(&dl);
return;
}
while (mrs_obj != 0) {
dsl_dataset_t *ds;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
dsl_deadlist_add_key(&dl, ds->ds_phys->ds_prev_snap_txg, tx);
mrs_obj = ds->ds_phys->ds_prev_snap_obj;
dsl_dataset_rele(ds, FTAG);
}
dsl_deadlist_close(&dl);
}
uint64_t
dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
uint64_t mrs_obj, dmu_tx_t *tx)
{
dsl_deadlist_entry_t *dle;
uint64_t newobj;
newobj = dsl_deadlist_alloc(dl->dl_os, tx);
if (dl->dl_oldfmt) {
dsl_deadlist_regenerate(dl->dl_os, newobj, mrs_obj, tx);
return (newobj);
}
dsl_deadlist_load_tree(dl);
for (dle = avl_first(&dl->dl_tree); dle;
dle = AVL_NEXT(&dl->dl_tree, dle)) {
uint64_t obj;
if (dle->dle_mintxg >= maxtxg)
break;
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
return (newobj);
}
void
dsl_deadlist_space(dsl_deadlist_t *dl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
if (dl->dl_oldfmt) {
VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
usedp, compp, uncompp));
return;
}
mutex_enter(&dl->dl_lock);
*usedp = dl->dl_phys->dl_used;
*compp = dl->dl_phys->dl_comp;
*uncompp = dl->dl_phys->dl_uncomp;
mutex_exit(&dl->dl_lock);
}
/*
* return space used in the range (mintxg, maxtxg].
* Includes maxtxg, does not include mintxg.
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
* UINT64_MAX).
*/
void
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
avl_index_t where;
if (dl->dl_oldfmt) {
VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
mintxg, maxtxg, usedp, compp, uncompp));
return;
}
dsl_deadlist_load_tree(dl);
*usedp = *compp = *uncompp = 0;
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
/*
* If we don't find this mintxg, there shouldn't be anything
* after it either.
*/
ASSERT(dle != NULL ||
avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
for (; dle && dle->dle_mintxg < maxtxg;
dle = AVL_NEXT(&dl->dl_tree, dle)) {
uint64_t used, comp, uncomp;
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
*usedp += used;
*compp += comp;
*uncompp += uncomp;
}
}
static void
dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
avl_index_t where;
uint64_t used, comp, uncomp;
bpobj_t bpo;
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used += used;
dl->dl_phys->dl_comp += comp;
dl->dl_phys->dl_uncomp += uncomp;
mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
}
static int
dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_deadlist_insert(dl, bp, tx);
return (0);
}
/*
* Merge the deadlist pointed to by 'obj' into dl. obj will be left as
* an empty deadlist.
*/
void
dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
{
zap_cursor_t zc;
zap_attribute_t za;
dmu_buf_t *bonus;
dsl_deadlist_phys_t *dlp;
dmu_object_info_t doi;
VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
bpobj_t bpo;
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_iterate(&bpo,
dsl_deadlist_insert_cb, dl, tx));
bpobj_close(&bpo);
return;
}
for (zap_cursor_init(&zc, dl->dl_os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
uint64_t mintxg = strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
}
zap_cursor_fini(&zc);
VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
dlp = bonus->db_data;
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
dmu_buf_rele(bonus, FTAG);
}
/*
* Remove entries on dl that are >= mintxg, and put them on the bpobj.
*/
void
dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
avl_index_t where;
ASSERT(!dl->dl_oldfmt);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
while (dle) {
uint64_t used, comp, uncomp;
dsl_deadlist_entry_t *dle_next;
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
mutex_enter(&dl->dl_lock);
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
dl->dl_phys->dl_used -= used;
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
mutex_exit(&dl->dl_lock);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
dle_next = AVL_NEXT(&dl->dl_tree, dle);
avl_remove(&dl->dl_tree, dle);
bpobj_close(&dle->dle_bpobj);
kmem_free(dle, sizeof (*dle));
dle = dle_next;
}
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -75,8 +74,6 @@
#include <sys/dsl_synctask.h>
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/zio_checksum.h> /* for the default checksum value */
#include <sys/zap.h>
#include <sys/fs/zfs.h>
#include <sys/cred.h>
@ -150,7 +147,7 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
}
static void
dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
@ -185,8 +182,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
VERIFY(zap_update(mos, jumpobj,
perm, 8, 1, &n, tx) == 0);
spa_history_internal_log(LOG_DS_PERM_UPDATE,
dd->dd_pool->dp_spa, tx, cr,
spa_history_log_internal(LOG_DS_PERM_UPDATE,
dd->dd_pool->dp_spa, tx,
"%s %s dataset = %llu", whokey, perm,
dd->dd_phys->dd_head_dataset_obj);
}
@ -194,7 +191,7 @@ dsl_deleg_set_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
}
static void
dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
@ -217,8 +214,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
(void) zap_remove(mos, zapobj, whokey, tx);
VERIFY(0 == zap_destroy(mos, jumpobj, tx));
}
spa_history_internal_log(LOG_DS_PERM_WHO_REMOVE,
dd->dd_pool->dp_spa, tx, cr,
spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
dd->dd_pool->dp_spa, tx,
"%s dataset = %llu", whokey,
dd->dd_phys->dd_head_dataset_obj);
continue;
@ -238,8 +235,8 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
VERIFY(0 == zap_destroy(mos,
jumpobj, tx));
}
spa_history_internal_log(LOG_DS_PERM_REMOVE,
dd->dd_pool->dp_spa, tx, cr,
spa_history_log_internal(LOG_DS_PERM_REMOVE,
dd->dd_pool->dp_spa, tx,
"%s %s dataset = %llu", whokey, perm,
dd->dd_phys->dd_head_dataset_obj);
}
@ -589,7 +586,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
if (dsl_prop_get_dd(dd,
zfs_prop_to_name(ZFS_PROP_ZONED),
8, 1, &zoned, NULL) != 0)
8, 1, &zoned, NULL, B_FALSE) != 0)
break;
if (!zoned)
break;
@ -739,5 +736,5 @@ dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
boolean_t
dsl_delegation_on(objset_t *os)
{
return (os->os->os_spa->spa_delegation);
return (!!spa_delegation(os->os_spa));
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/dmu.h>
@ -32,6 +31,7 @@
#include <sys/dsl_synctask.h>
#include <sys/dsl_deleg.h>
#include <sys/spa.h>
#include <sys/metaslab.h>
#include <sys/zap.h>
#include <sys/zio.h>
#include <sys/arc.h>
@ -39,8 +39,7 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2,
cred_t *cr, dmu_tx_t *tx);
static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
/* ARGSUSED */
@ -63,8 +62,8 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
spa_close(dd->dd_pool->dp_spa, dd);
/*
* The props callback list should be empty since they hold the
* dir open.
* The props callback list should have been cleaned up by
* objset_evict().
*/
list_destroy(&dd->dd_prop_cbs);
mutex_destroy(&dd->dd_lock);
@ -107,6 +106,8 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
offsetof(dsl_prop_cb_record_t, cbr_node));
dsl_dir_snap_cmtime_update(dd);
if (dd->dd_phys->dd_parent_obj) {
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
@ -133,6 +134,25 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
}
if (dsl_dir_is_clone(dd)) {
dmu_buf_t *origin_bonus;
dsl_dataset_phys_t *origin_phys;
/*
* We can't open the origin dataset, because
* that would require opening this dsl_dir.
* Just look at its phys directly instead.
*/
err = dmu_bonus_hold(dp->dp_meta_objset,
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
if (err)
goto errout;
origin_phys = origin_bonus->db_data;
dd->dd_origin_txg =
origin_phys->ds_creation_txg;
dmu_buf_rele(origin_bonus, FTAG);
}
winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
dsl_dir_evict);
if (winner) {
@ -392,7 +412,7 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
{
objset_t *mos = dp->dp_meta_objset;
uint64_t ddobj;
dsl_dir_phys_t *dsphys;
dsl_dir_phys_t *ddphys;
dmu_buf_t *dbuf;
ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
@ -407,17 +427,17 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
}
VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
dmu_buf_will_dirty(dbuf, tx);
dsphys = dbuf->db_data;
ddphys = dbuf->db_data;
dsphys->dd_creation_time = gethrestime_sec();
ddphys->dd_creation_time = gethrestime_sec();
if (pds)
dsphys->dd_parent_obj = pds->dd_object;
dsphys->dd_props_zapobj = zap_create(mos,
ddphys->dd_parent_obj = pds->dd_object;
ddphys->dd_props_zapobj = zap_create(mos,
DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
dsphys->dd_child_dir_zapobj = zap_create(mos,
ddphys->dd_child_dir_zapobj = zap_create(mos,
DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
dsphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
dmu_buf_rele(dbuf, FTAG);
return (ddobj);
@ -427,7 +447,8 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
@ -454,19 +475,27 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
void
dsl_dir_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
objset_t *mos = dd->dd_pool->dp_meta_objset;
uint64_t val, obj;
dsl_prop_setarg_t psa;
uint64_t value = 0;
uint64_t obj;
dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
/* Remove our reservation. */
val = 0;
dsl_dir_set_reservation_sync(dd, &val, cr, tx);
dsl_prop_setarg_init_uint64(&psa, "reservation",
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
&value);
psa.psa_effective_value = 0; /* predict default value */
dsl_dir_set_reservation_sync(ds, &psa, tx);
ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
for (t = 0; t < DD_USED_NUM; t++)
@ -640,15 +669,6 @@ dsl_dir_space_available(dsl_dir_t *dd,
if (used > quota) {
/* over quota */
myspace = 0;
/*
* While it's OK to be a little over quota, if
* we think we are using more space than there
* is in the pool (which is already 1.6% more than
* dsl_pool_adjustedsize()), something is very
* wrong.
*/
ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa));
} else {
/*
* the lesser of the space provided by our parent and
@ -676,8 +696,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
{
uint64_t txg = tx->tx_txg;
uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
uint64_t deferred = 0;
struct tempreserve *tr;
int enospc = EDQUOT;
int retval = EDQUOT;
int txgidx = txg & TXG_MASK;
int i;
uint64_t ref_rsrv = 0;
@ -703,7 +724,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
*/
if (first && tx->tx_objset) {
int error;
dsl_dataset_t *ds = tx->tx_objset->os->os_dsl_dataset;
dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
error = dsl_dataset_check_quota(ds, checkrefquota,
asize, est_inflight, &used_on_disk, &ref_rsrv);
@ -723,7 +744,8 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
quota = dd->dd_phys->dd_quota;
/*
* Adjust the quota against the actual pool size at the root.
* Adjust the quota against the actual pool size at the root
* minus any outstanding deferred frees.
* To ensure that it's possible to remove files from a full
* pool without inducing transient overcommits, we throttle
* netfree transactions against a quota that is slightly larger,
@ -732,10 +754,12 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
* removes to get through.
*/
if (dd->dd_parent == NULL) {
spa_t *spa = dd->dd_pool->dp_spa;
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
if (poolsize < quota) {
quota = poolsize;
enospc = ENOSPC;
deferred = metaslab_class_get_deferred(spa_normal_class(spa));
if (poolsize - deferred < quota) {
quota = poolsize - deferred;
retval = ENOSPC;
}
}
@ -745,15 +769,16 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
if (used_on_disk + est_inflight > quota) {
if (est_inflight > 0 || used_on_disk < quota)
enospc = ERESTART;
if (used_on_disk + est_inflight >= quota) {
if (est_inflight > 0 || used_on_disk < quota ||
(retval == ENOSPC && used_on_disk < quota + deferred))
retval = ERESTART;
dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
"quota=%lluK tr=%lluK err=%d\n",
used_on_disk>>10, est_inflight>>10,
quota>>10, asize>>10, enospc);
quota>>10, asize>>10, retval);
mutex_exit(&dd->dd_lock);
return (enospc);
return (retval);
}
/* We need to up our estimated delta before dropping dd_lock */
@ -987,13 +1012,16 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
static int
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *quotap = arg2;
uint64_t new_quota = *quotap;
int err = 0;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
int err;
uint64_t towrite;
if (new_quota == 0)
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
if (psa->psa_effective_value == 0)
return (0);
mutex_enter(&dd->dd_lock);
@ -1005,64 +1033,88 @@ dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
*/
towrite = dsl_dir_space_towrite(dd);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(new_quota < dd->dd_phys->dd_reserved ||
new_quota < dd->dd_phys->dd_used_bytes + towrite)) {
(psa->psa_effective_value < dd->dd_phys->dd_reserved ||
psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
err = ENOSPC;
}
mutex_exit(&dd->dd_lock);
return (err);
}
/* ARGSUSED */
extern dsl_syncfunc_t dsl_prop_set_sync;
static void
dsl_dir_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *quotap = arg2;
uint64_t new_quota = *quotap;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
dmu_buf_will_dirty(dd->dd_dbuf, tx);
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = new_quota;
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
spa_history_internal_log(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
tx, cr, "%lld dataset = %llu ",
(longlong_t)new_quota, dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
tx, "%lld dataset = %llu ",
(longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
}
int
dsl_dir_set_quota(const char *ddname, uint64_t quota)
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
if (quota != dd->dd_phys->dd_quota) {
/*
* If someone removes a file, then tries to set the quota, we
* want to make sure the file freeing takes effect.
*/
txg_wait_open(dd->dd_pool, 0);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, dd, &quota, 0);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
/*
* If someone removes a file, then tries to set the quota, we want to
* make sure the file freeing takes effect.
*/
txg_wait_open(dd->dd_pool, 0);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
}
int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *reservationp = arg2;
uint64_t new_reservation = *reservationp;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value;
uint64_t used, avail;
int err;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
effective_value = psa->psa_effective_value;
/*
* If we are doing the preliminary check in open context, the
@ -1082,37 +1134,40 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
if (MAX(used, new_reservation) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, new_reservation) -
if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, effective_value) -
MAX(used, dd->dd_phys->dd_reserved);
if (delta > avail)
return (ENOSPC);
if (dd->dd_phys->dd_quota > 0 &&
new_reservation > dd->dd_phys->dd_quota)
effective_value > dd->dd_phys->dd_quota)
return (ENOSPC);
}
return (0);
}
/* ARGSUSED */
static void
dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
uint64_t *reservationp = arg2;
uint64_t new_reservation = *reservationp;
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
uint64_t used;
int64_t delta;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
dmu_buf_will_dirty(dd->dd_dbuf, tx);
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
delta = MAX(used, new_reservation) -
delta = MAX(used, effective_value) -
MAX(used, dd->dd_phys->dd_reserved);
dd->dd_phys->dd_reserved = new_reservation;
dd->dd_phys->dd_reserved = effective_value;
if (dd->dd_parent != NULL) {
/* Roll up this additional usage into our ancestors */
@ -1121,23 +1176,39 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
}
mutex_exit(&dd->dd_lock);
spa_history_internal_log(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
tx, cr, "%lld dataset = %llu",
(longlong_t)new_reservation, dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
tx, "%lld dataset = %llu",
(longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
}
int
dsl_dir_set_reservation(const char *ddname, uint64_t reservation)
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, dd, &reservation, 0);
dsl_dir_set_reservation_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
}
@ -1175,7 +1246,6 @@ struct renamearg {
const char *mynewname;
};
/*ARGSUSED*/
static int
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
@ -1186,8 +1256,14 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
int err;
uint64_t val;
/* There should be 2 references: the open and the dirty */
if (dmu_buf_refcount(dd->dd_dbuf) > 2)
/*
* There should only be one reference, from dmu_objset_rename().
* Fleeting holds are also possible (eg, from "zfs list" getting
* stats), but any that are present in open context will likely
* be gone by syncing context, so only fail from syncing
* context.
*/
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
return (EBUSY);
/* check for existing name */
@ -1216,7 +1292,7 @@ dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
@ -1265,8 +1341,8 @@ dsl_dir_rename_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT3U(err, ==, 0);
spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa,
tx, cr, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
}
int
@ -1315,3 +1391,26 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
return (0);
}
timestruc_t
dsl_dir_snap_cmtime(dsl_dir_t *dd)
{
timestruc_t t;
mutex_enter(&dd->dd_lock);
t = dd->dd_snap_cmtime;
mutex_exit(&dd->dd_lock);
return (t);
}
void
dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
{
timestruc_t t;
gethrestime(&t);
mutex_enter(&dd->dd_lock);
dd->dd_snap_cmtime = t;
mutex_exit(&dd->dd_lock);
}

View File

@ -19,14 +19,16 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/dsl_pool.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_scan.h>
#include <sys/dnode.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
#include <sys/arc.h>
@ -36,10 +38,11 @@
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
int zfs_txg_synctime = 5; /* target secs to sync a txg */
int zfs_txg_synctime_ms = 5000; /* target millisecs to sync a txg */
uint64_t zfs_write_limit_min = 32 << 20; /* min write limit is 32MB */
uint64_t zfs_write_limit_max = 0; /* max data payload per txg */
@ -50,7 +53,7 @@ kmutex_t zfs_write_limit_lock;
static pgcnt_t old_physmem = 0;
static int
int
dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
{
uint64_t obj;
@ -88,7 +91,6 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
offsetof(dsl_dataset_t, ds_synced_link));
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&dp->dp_scrub_cancel_lock, NULL, MUTEX_DEFAULT, NULL);
dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri,
1, 4, 0);
@ -103,13 +105,13 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
dsl_dir_t *dd;
dsl_dataset_t *ds;
objset_impl_t *osi;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi);
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
&dp->dp_meta_objset);
if (err)
goto out;
dp->dp_meta_objset = &osi->os;
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
@ -143,53 +145,30 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
goto out;
}
/* get scrub status */
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_FUNC, sizeof (uint32_t), 1,
&dp->dp_scrub_func);
if (err == 0) {
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_QUEUE, sizeof (uint64_t), 1,
&dp->dp_scrub_queue_obj);
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
&dp->dp_free_dir);
if (err)
goto out;
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_MIN_TXG, sizeof (uint64_t), 1,
&dp->dp_scrub_min_txg);
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
if (err)
goto out;
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_MAX_TXG, sizeof (uint64_t), 1,
&dp->dp_scrub_max_txg);
if (err)
goto out;
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_BOOKMARK, sizeof (uint64_t), 4,
&dp->dp_scrub_bookmark);
if (err)
goto out;
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCRUB_ERRORS, sizeof (uint64_t), 1,
&spa->spa_scrub_errors);
if (err)
goto out;
if (spa_version(spa) < SPA_VERSION_DSL_SCRUB) {
/*
* A new-type scrub was in progress on an old
* pool. Restart from the beginning, since the
* old software may have changed the pool in the
* meantime.
*/
dsl_pool_scrub_restart(dp);
}
} else {
/*
* It's OK if there is no scrub in progress (and if
* there was an I/O error, ignore it).
*/
err = 0;
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);
if (err == ENOENT)
err = 0;
if (err)
goto out;
err = dsl_scan_init(dp, txg);
out:
rw_exit(&dp->dp_config_rwlock);
if (err)
@ -214,22 +193,27 @@ dsl_pool_close(dsl_pool_t *dp)
dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
if (dp->dp_mos_dir)
dsl_dir_close(dp->dp_mos_dir, dp);
if (dp->dp_free_dir)
dsl_dir_close(dp->dp_free_dir, dp);
if (dp->dp_root_dir)
dsl_dir_close(dp->dp_root_dir, dp);
bpobj_close(&dp->dp_free_bpobj);
/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
if (dp->dp_meta_objset)
dmu_objset_evict(NULL, dp->dp_meta_objset->os);
dmu_objset_evict(dp->dp_meta_objset);
txg_list_destroy(&dp->dp_dirty_datasets);
txg_list_destroy(&dp->dp_sync_tasks);
txg_list_destroy(&dp->dp_dirty_dirs);
list_destroy(&dp->dp_synced_datasets);
arc_flush(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
rw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
mutex_destroy(&dp->dp_scrub_cancel_lock);
taskq_destroy(dp->dp_vnrele_taskq);
if (dp->dp_blkstats)
kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
@ -242,19 +226,22 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
objset_impl_t *osip;
objset_t *os;
dsl_dataset_t *ds;
uint64_t dsobj;
uint64_t obj;
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = &dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os;
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
/* create the pool directory */
err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
ASSERT3U(err, ==, 0);
/* Initialize scan structures */
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
/* create and open the root dir */
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
@ -265,18 +252,33 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
VERIFY(0 == dsl_pool_open_special_dir(dp,
MOS_DIR_NAME, &dp->dp_mos_dir));
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
/* create and open the free dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB)
dsl_pool_create_origin(dp, tx);
/* create the root dataset */
dsobj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
/* create the root objset */
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
osip = dmu_objset_create_impl(dp->dp_spa, ds,
VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
#ifdef _KERNEL
zfs_create_fs(&osip->os, kcred, zplprops, tx);
zfs_create_fs(os, kcred, zplprops, tx);
#endif
dsl_dataset_rele(ds, FTAG);
@ -285,6 +287,14 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
return (dp);
}
static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_deadlist_insert(dl, bp, tx);
return (0);
}
void
dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
{
@ -293,11 +303,19 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_sync_task_group_t *dstg;
objset_impl_t *mosi = dp->dp_meta_objset->os;
objset_t *mos = dp->dp_meta_objset;
hrtime_t start, write_time;
uint64_t data_written;
int err;
/*
* We need to copy dp_space_towrite() before doing
* dsl_sync_task_group_sync(), because
* dsl_dataset_snapshot_reserve_space() will increase
* dp_space_towrite but not actually write anything.
*/
data_written = dp->dp_space_towrite[txg & TXG_MASK];
tx = dmu_tx_create_assigned(dp, txg);
dp->dp_read_overhead = 0;
@ -323,11 +341,11 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
for (ds = list_head(&dp->dp_synced_datasets); ds;
ds = list_next(&dp->dp_synced_datasets, ds))
dmu_objset_do_userquota_callbacks(ds->ds_user_ptr, tx);
dmu_objset_do_userquota_updates(ds->ds_objset, tx);
/*
* Sync the datasets again to push out the changes due to
* userquota updates. This must be done before we process the
* userspace updates. This must be done before we process the
* sync tasks, because that could cause a snapshot of a dataset
* whose ds_bp will be rewritten when we do this 2nd sync.
*/
@ -339,6 +357,16 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
}
err = zio_wait(zio);
/*
* Move dead blocks from the pending deadlist to the on-disk
* deadlist.
*/
for (ds = list_head(&dp->dp_synced_datasets); ds;
ds = list_next(&dp->dp_synced_datasets, ds)) {
bplist_iterate(&ds->ds_pending_deadlist,
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
}
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
/*
* No more sync tasks should have been added while we
@ -354,14 +382,11 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dsl_dir_sync(dd, tx);
write_time += gethrtime() - start;
if (spa_sync_pass(dp->dp_spa) == 1)
dsl_pool_scrub_sync(dp, tx);
start = gethrtime();
if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
dmu_objset_sync(mosi, zio, tx);
dmu_objset_sync(mos, zio, tx);
err = zio_wait(zio);
ASSERT(err == 0);
dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
@ -374,7 +399,6 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dmu_tx_commit(tx);
data_written = dp->dp_space_towrite[txg & TXG_MASK];
dp->dp_space_towrite[txg & TXG_MASK] = 0;
ASSERT(dp->dp_tempreserved[txg & TXG_MASK] == 0);
@ -399,10 +423,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
* amount of write traffic allowed into each transaction group.
* Weight the throughput calculation towards the current value:
* thru = 3/4 old_thru + 1/4 new_thru
*
* Note: write_time is in nanosecs, so write_time/MICROSEC
* yields millisecs
*/
ASSERT(zfs_write_limit_min > 0);
if (data_written > zfs_write_limit_min / 8 && write_time > 0) {
uint64_t throughput = (data_written * NANOSEC) / write_time;
if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) {
uint64_t throughput = data_written / (write_time / MICROSEC);
if (dp->dp_throughput)
dp->dp_throughput = throughput / 4 +
3 * dp->dp_throughput / 4;
@ -410,21 +438,24 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dp->dp_throughput = throughput;
dp->dp_write_limit = MIN(zfs_write_limit_inflated,
MAX(zfs_write_limit_min,
dp->dp_throughput * zfs_txg_synctime));
dp->dp_throughput * zfs_txg_synctime_ms));
}
}
void
dsl_pool_zil_clean(dsl_pool_t *dp)
dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
{
dsl_dataset_t *ds;
objset_t *os;
while (ds = list_head(&dp->dp_synced_datasets)) {
list_remove(&dp->dp_synced_datasets, ds);
ASSERT(ds->ds_user_ptr != NULL);
zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
os = ds->ds_objset;
zil_clean(os->os_zil);
ASSERT(!dmu_objset_is_dirty(os, txg));
dmu_buf_rele(ds->ds_dbuf, ds);
}
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}
/*
@ -601,6 +632,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
dmu_buf_will_dirty(prev->ds_dbuf, tx);
prev->ds_phys->ds_next_clones_obj =
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
@ -620,8 +652,67 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
(void) dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
tx, DS_FIND_CHILDREN);
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
tx, DS_FIND_CHILDREN));
}
/* ARGSUSED */
static int
upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
if (ds->ds_dir->dd_phys->dd_origin_obj) {
dsl_dataset_t *origin;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
if (origin->ds_dir->dd_phys->dd_clones == 0) {
dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
origin->ds_dir->dd_phys->dd_clones = zap_create(mos,
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
dsl_dataset_rele(origin, FTAG);
}
dsl_dataset_rele(ds, FTAG);
return (0);
}
void
dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
{
ASSERT(dmu_tx_is_syncing(tx));
uint64_t obj;
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/*
* We can't use bpobj_alloc(), because spa_version() still
* returns the old version, and we need a new-version bpobj with
* subobj support. So call dmu_object_alloc() directly.
*/
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
}
void
@ -638,7 +729,7 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, kcred, tx);
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
dp, &dp->dp_origin_snap));
dsl_dataset_rele(ds, FTAG);
@ -650,3 +741,108 @@ dsl_pool_vnrele_taskq(dsl_pool_t *dp)
{
return (dp->dp_vnrele_taskq);
}
/*
* Walk through the pool-wide zap object of temporary snapshot user holds
* and release them.
*/
void
dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
{
zap_attribute_t za;
zap_cursor_t zc;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
if (zapobj == 0)
return;
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
for (zap_cursor_init(&zc, mos, zapobj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
char *htag;
uint64_t dsobj;
htag = strchr(za.za_name, '-');
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
(void) dsl_dataset_user_release_tmp(dp, dsobj, htag);
}
zap_cursor_fini(&zc);
}
/*
* Create the pool-wide zap object for storing temporary snapshot holds.
*/
void
dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
{
objset_t *mos = dp->dp_meta_objset;
ASSERT(dp->dp_tmp_userrefs_obj == 0);
ASSERT(dmu_tx_is_syncing(tx));
dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
DMU_OT_NONE, 0, tx);
VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
}
static int
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
{
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
char *name;
int error;
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
ASSERT(dmu_tx_is_syncing(tx));
/*
* If the pool was created prior to SPA_VERSION_USERREFS, the
* zap object for temporary holds might not exist yet.
*/
if (zapobj == 0) {
if (holding) {
dsl_pool_user_hold_create_obj(dp, tx);
zapobj = dp->dp_tmp_userrefs_obj;
} else {
return (ENOENT);
}
}
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
if (holding)
error = zap_add(mos, zapobj, name, 8, 1, now, tx);
else
error = zap_remove(mos, zapobj, name, tx);
strfree(name);
return (error);
}
/*
* Add a temporary hold for the given dataset object and tag.
*/
int
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
uint64_t *now, dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
}
/*
* Release a temporary hold for the given dataset object and tag.
*/
int
dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
tx, B_FALSE));
}

File diff suppressed because it is too large Load Diff

1739
module/zfs/dsl_scan.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,18 +19,15 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_synctask.h>
#include <sys/cred.h>
#include <sys/metaslab.h>
#define DST_AVG_BLKSHIFT 14
@ -50,7 +47,6 @@ dsl_sync_task_group_create(dsl_pool_t *dp)
list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
offsetof(dsl_sync_task_t, dst_node));
dstg->dstg_pool = dp;
dstg->dstg_cr = CRED();
return (dstg);
}
@ -112,14 +108,21 @@ top:
return (dstg->dstg_err);
}
VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg));
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
dmu_tx_commit(tx);
txg_wait_synced(dstg->dstg_pool, txg);
if (dstg->dstg_err == EAGAIN)
if (dstg->dstg_err == EAGAIN) {
txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
goto top;
}
return (dstg->dstg_err);
}
@ -131,7 +134,12 @@ dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
dstg->dstg_nowaiter = B_TRUE;
txg = dmu_tx_get_txg(tx);
VERIFY(0 == txg_list_add(&dstg->dstg_pool->dp_sync_tasks, dstg, txg));
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
}
void
@ -150,25 +158,30 @@ void
dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
void *tr_cookie;
dsl_pool_t *dp = dstg->dstg_pool;
uint64_t quota, used;
ASSERT3U(dstg->dstg_err, ==, 0);
/*
* Check for sufficient space.
* Check for sufficient space. We just check against what's
* on-disk; we don't want any in-flight accounting to get in our
* way, because open context may have already used up various
* in-core limits (arc_tempreserve, dsl_pool_tempreserve).
*/
dstg->dstg_err = dsl_dir_tempreserve_space(dstg->dstg_pool->dp_mos_dir,
dstg->dstg_space, dstg->dstg_space * 3, 0, 0, &tr_cookie, tx);
/* don't bother trying again */
if (dstg->dstg_err == ERESTART)
dstg->dstg_err = EAGAIN;
if (dstg->dstg_err)
quota = dsl_pool_adjustedsize(dp, B_FALSE) -
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
/* MOS space is triple-dittoed, so we multiply by 3. */
if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
dstg->dstg_err = ENOSPC;
return;
}
/*
* Check for errors by calling checkfuncs.
*/
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_WRITER);
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
@ -183,13 +196,10 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
*/
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2,
dstg->dstg_cr, tx);
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
}
}
rw_exit(&dstg->dstg_pool->dp_config_rwlock);
dsl_dir_tempreserve_clear(tr_cookie, tx);
rw_exit(&dp->dp_config_rwlock);
if (dstg->dstg_nowaiter)
dsl_sync_task_group_destroy(dstg);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
@ -94,6 +93,8 @@ static ulong_t ereport_qlen = 0;
static size_t ereport_size = 0;
static int ereport_cols = 80;
extern void fastreboot_disable_highpil(void);
/*
* Common fault management kstats to record ereport generation
* failures
@ -374,6 +375,9 @@ fm_panic(const char *format, ...)
va_list ap;
(void) casptr((void *)&fm_panicstr, NULL, (void *)format);
#if defined(__i386) || defined(__amd64)
fastreboot_disable_highpil();
#endif /* __i386 || __amd64 */
va_start(ap, format);
vpanic(format, ap);
va_end(ap);
@ -512,10 +516,10 @@ fm_ereport_post(nvlist_t *ereport, int evc_flag)
if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
sysevent_evc_unbind(error_chan);
(void) sysevent_evc_unbind(error_chan);
return;
}
sysevent_evc_unbind(error_chan);
(void) sysevent_evc_unbind(error_chan);
}
/*
@ -788,6 +792,14 @@ fm_payload_set(nvlist_t *payload, ...)
* detector nvlist_t <detector>
* ereport-payload nvlist_t <var args>
*
* We don't actually add a 'version' member to the payload. Really,
* the version quoted to us by our caller is that of the category 1
* "ereport" event class (and we require FM_EREPORT_VERS0) but
* the payload version of the actual leaf class event under construction
* may be something else. Callers should supply a version in the varargs,
* or (better) we could take two version arguments - one for the
* ereport category 1 classification (expect FM_EREPORT_VERS0) and one
* for the leaf class.
*/
void
fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
@ -920,46 +932,41 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
* version uint8_t 0
* auth nvlist_t <auth>
* devpath string <devpath>
* devid string <devid>
* [devid] string <devid>
* [target-port-l0id] string <target-port-lun0-id>
*
* Note that auth and devid are optional members.
*/
void
fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
const char *devpath, const char *devid)
const char *devpath, const char *devid, const char *tpl0)
{
int err = 0;
if (version != DEV_SCHEME_VERSION0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
FM_FMRI_SCHEME_DEV) != 0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
if (auth != NULL) {
if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
(nvlist_t *)auth) != 0) {
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
}
err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
(nvlist_t *)auth);
}
if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
}
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
if (devid != NULL)
if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
if (tpl0 != NULL)
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
if (err)
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
}
/*
@ -1264,3 +1271,102 @@ print_msg_hwerr(ctid_t ct_id, proc_t *p)
uprintf("Killed process %d (%s) in contract id %d "
"due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
}
void
fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
{
nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
nvlist_t *pairs[HC_MAXPAIRS];
nvlist_t **hcl;
uint_t n;
int i, j;
va_list ap;
char *hcname, *hcid;
if (!fm_fmri_hc_set_common(fmri, version, auth))
return;
/*
* copy the bboard nvpairs to the pairs array
*/
if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
!= 0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
for (i = 0; i < n; i++) {
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
&hcname) != 0) {
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
pairs[i] = fm_nvlist_create(nva);
if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
for (j = 0; j <= i; j++) {
if (pairs[j] != NULL)
fm_nvlist_destroy(pairs[j],
FM_NVA_RETAIN);
}
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
}
/*
* create the pairs from passed in pairs
*/
npairs = MIN(npairs, HC_MAXPAIRS);
va_start(ap, npairs);
for (i = n; i < npairs + n; i++) {
const char *name = va_arg(ap, const char *);
uint32_t id = va_arg(ap, uint32_t);
char idstr[11];
(void) snprintf(idstr, sizeof (idstr), "%u", id);
pairs[i] = fm_nvlist_create(nva);
if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
for (j = 0; j <= i; j++) {
if (pairs[j] != NULL)
fm_nvlist_destroy(pairs[j],
FM_NVA_RETAIN);
}
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
}
va_end(ap);
/*
* Create the fmri hc list
*/
if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
npairs + n) != 0) {
atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
for (i = 0; i < npairs + n; i++) {
fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
}
if (snvl != NULL) {
if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
atomic_add_64(
&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
return;
}
}
}

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_ARC_H
@ -48,7 +47,8 @@ arc_done_func_t arc_getbuf_func;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
krwlock_t b_lock;
kmutex_t b_evict_lock;
krwlock_t b_data_lock;
void *b_data;
arc_evict_func_t *b_efunc;
void *b_private;
@ -87,10 +87,13 @@ arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
arc_buf_contents_t type);
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_release_bp(arc_buf_t *buf, void *tag, blkptr_t *bp, spa_t *spa,
zbookmark_t *zb);
int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
@ -99,28 +102,16 @@ void arc_buf_thaw(arc_buf_t *buf);
int arc_referenced(arc_buf_t *buf);
#endif
typedef struct writeprops {
dmu_object_type_t wp_type;
uint8_t wp_level;
uint8_t wp_copies;
uint8_t wp_dncompress, wp_oscompress;
uint8_t wp_dnchecksum, wp_oschecksum;
} writeprops_t;
void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp);
int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf,
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_buf_t *pbuf,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
int arc_read_nolock(zio_t *pio, spa_t *spa, blkptr_t *bp,
int arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_done_func_t *done, void *private, int priority, int flags,
uint32_t *arc_flags, const zbookmark_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, const writeprops_t *wp,
boolean_t l2arc, uint64_t txg, blkptr_t *bp, arc_buf_t *buf,
arc_done_func_t *ready, arc_done_func_t *done, void *private, int priority,
int zio_flags, const zbookmark_t *zb);
int arc_free(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
int arc_tryread(spa_t *spa, blkptr_t *bp, void *data);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
arc_done_func_t *ready, arc_done_func_t *done, void *private,
int priority, int zio_flags, const zbookmark_t *zb);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
int arc_buf_evict(arc_buf_t *buf);

View File

@ -19,68 +19,36 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_BPLIST_H
#define _SYS_BPLIST_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bplist_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpl_entries blkptr_t's, representing
* a total of bpl_bytes physical space.
*/
uint64_t bpl_entries;
uint64_t bpl_bytes;
uint64_t bpl_comp;
uint64_t bpl_uncomp;
} bplist_phys_t;
#define BPLIST_SIZE_V0 (2 * sizeof (uint64_t))
typedef struct bplist_q {
blkptr_t bpq_blk;
void *bpq_next;
} bplist_q_t;
typedef struct bplist_entry {
blkptr_t bpe_blk;
list_node_t bpe_node;
} bplist_entry_t;
typedef struct bplist {
kmutex_t bpl_lock;
objset_t *bpl_mos;
uint64_t bpl_object;
uint8_t bpl_blockshift;
uint8_t bpl_bpshift;
uint8_t bpl_havecomp;
bplist_q_t *bpl_queue;
bplist_phys_t *bpl_phys;
dmu_buf_t *bpl_dbuf;
dmu_buf_t *bpl_cached_dbuf;
list_t bpl_list;
} bplist_t;
extern uint64_t bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx);
extern void bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx);
extern int bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object);
extern void bplist_close(bplist_t *bpl);
extern boolean_t bplist_empty(bplist_t *bpl);
extern int bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp);
extern int bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx);
extern void bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp);
extern void bplist_sync(bplist_t *bpl, dmu_tx_t *tx);
extern void bplist_vacate(bplist_t *bpl, dmu_tx_t *tx);
extern int bplist_space(bplist_t *bpl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
extern int bplist_space_birthrange(bplist_t *bpl,
uint64_t mintxg, uint64_t maxtxg, uint64_t *dasizep);
typedef int bplist_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
void bplist_create(bplist_t *bpl);
void bplist_destroy(bplist_t *bpl);
void bplist_append(bplist_t *bpl, const blkptr_t *bp);
void bplist_iterate(bplist_t *bpl, bplist_itor_t *func,
void *arg, dmu_tx_t *tx);
#ifdef __cplusplus
}

View File

@ -0,0 +1,91 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_BPOBJ_H
#define _SYS_BPOBJ_H
#include <sys/dmu.h>
#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/zio.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bpobj_phys {
/*
* This is the bonus buffer for the dead lists. The object's
* contents is an array of bpo_entries blkptr_t's, representing
* a total of bpo_bytes physical space.
*/
uint64_t bpo_num_blkptrs;
uint64_t bpo_bytes;
uint64_t bpo_comp;
uint64_t bpo_uncomp;
uint64_t bpo_subobjs;
uint64_t bpo_num_subobjs;
} bpobj_phys_t;
#define BPOBJ_SIZE_V0 (2 * sizeof (uint64_t))
#define BPOBJ_SIZE_V1 (4 * sizeof (uint64_t))
typedef struct bpobj {
kmutex_t bpo_lock;
objset_t *bpo_os;
uint64_t bpo_object;
int bpo_epb;
uint8_t bpo_havecomp;
uint8_t bpo_havesubobj;
bpobj_phys_t *bpo_phys;
dmu_buf_t *bpo_dbuf;
dmu_buf_t *bpo_cached_dbuf;
} bpobj_t;
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object);
void bpobj_close(bpobj_t *bpo);
int bpobj_iterate(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx);
int bpobj_iterate_nofree(bpobj_t *bpo, bpobj_itor_t func, void *, dmu_tx_t *);
int bpobj_iterate_dbg(bpobj_t *bpo, uint64_t *itorp, blkptr_t *bp);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx);
int bpobj_space(bpobj_t *bpo,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int bpobj_space_range(bpobj_t *bpo, uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BPOBJ_H */

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DBUF_H
@ -38,7 +37,6 @@
extern "C" {
#endif
#define DB_BONUS_BLKID (-1ULL)
#define IN_DMU_SYNC 2
/*
@ -75,7 +73,6 @@ typedef enum dbuf_states {
DB_EVICTING
} dbuf_states_t;
struct objset_impl;
struct dnode;
struct dmu_tx;
@ -134,6 +131,7 @@ typedef struct dbuf_dirty_record {
arc_buf_t *dr_data;
blkptr_t dr_overridden_by;
override_states_t dr_override_state;
uint8_t dr_copies;
} dl;
} dt;
} dbuf_dirty_record_t;
@ -148,7 +146,7 @@ typedef struct dmu_buf_impl {
dmu_buf_t db;
/* the objset we belong to */
struct objset_impl *db_objset;
struct objset *db_objset;
/*
* the dnode we belong to (NULL when evicted)
@ -242,6 +240,10 @@ uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
dmu_buf_impl_t *dbuf_create_tlib(struct dnode *dn, char *data);
void dbuf_create_bonus(struct dnode *dn);
int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
void dbuf_spill_hold(struct dnode *dn, dmu_buf_impl_t **dbp, void *tag);
void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
@ -255,6 +257,7 @@ void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
uint64_t dbuf_refcount(dmu_buf_impl_t *db);
void dbuf_rele(dmu_buf_impl_t *db, void *tag);
void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag);
dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid);
@ -266,6 +269,7 @@ void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
void dbuf_clear(dmu_buf_impl_t *db);
void dbuf_evict(dmu_buf_impl_t *db);
@ -273,6 +277,7 @@ void dbuf_evict(dmu_buf_impl_t *db);
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, dmu_tx_t *tx);
void dbuf_release_bp(dmu_buf_impl_t *db);
void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
struct dmu_tx *);
@ -324,7 +329,7 @@ _NOTE(CONSTCOND) } while (0)
#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \
sprintf_blkptr(__blkbuf, bp); \
dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \

View File

@ -0,0 +1,246 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DDT_H
#define _SYS_DDT_H
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* On-disk DDT formats, in the desired search order (newest version first).
*/
enum ddt_type {
DDT_TYPE_ZAP = 0,
DDT_TYPES
};
/*
* DDT classes, in the desired search order (highest replication level first).
*/
enum ddt_class {
DDT_CLASS_DITTO = 0,
DDT_CLASS_DUPLICATE,
DDT_CLASS_UNIQUE,
DDT_CLASSES
};
#define DDT_TYPE_CURRENT 0
#define DDT_COMPRESS_BYTEORDER_MASK 0x80
#define DDT_COMPRESS_FUNCTION_MASK 0x7f
/*
* On-disk ddt entry: key (name) and physical storage (value).
*/
typedef struct ddt_key {
zio_cksum_t ddk_cksum; /* 256-bit block checksum */
uint64_t ddk_prop; /* LSIZE, PSIZE, compression */
} ddt_key_t;
/*
* ddk_prop layout:
*
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | 0 | 0 | 0 | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*/
#define DDK_GET_LSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_LSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
#define DDK_GET_PSIZE(ddk) \
BF64_GET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
#define DDK_SET_PSIZE(ddk, x) \
BF64_SET_SB((ddk)->ddk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define DDK_GET_COMPRESS(ddk) BF64_GET((ddk)->ddk_prop, 32, 8)
#define DDK_SET_COMPRESS(ddk, x) BF64_SET((ddk)->ddk_prop, 32, 8, x)
#define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t))
typedef struct ddt_phys {
dva_t ddp_dva[SPA_DVAS_PER_BP];
uint64_t ddp_refcnt;
uint64_t ddp_phys_birth;
} ddt_phys_t;
enum ddt_phys_type {
DDT_PHYS_DITTO = 0,
DDT_PHYS_SINGLE = 1,
DDT_PHYS_DOUBLE = 2,
DDT_PHYS_TRIPLE = 3,
DDT_PHYS_TYPES
};
/*
* In-core ddt entry
*/
struct ddt_entry {
ddt_key_t dde_key;
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
void *dde_repair_data;
enum ddt_type dde_type;
enum ddt_class dde_class;
uint8_t dde_loading;
uint8_t dde_loaded;
kcondvar_t dde_cv;
avl_node_t dde_node;
};
/*
* In-core ddt
*/
struct ddt {
kmutex_t ddt_lock;
avl_tree_t ddt_tree;
avl_tree_t ddt_repair_tree;
enum zio_checksum ddt_checksum;
spa_t *ddt_spa;
objset_t *ddt_os;
uint64_t ddt_stat_object;
uint64_t ddt_object[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram[DDT_TYPES][DDT_CLASSES];
ddt_histogram_t ddt_histogram_cache[DDT_TYPES][DDT_CLASSES];
ddt_object_t ddt_object_stats[DDT_TYPES][DDT_CLASSES];
avl_node_t ddt_node;
};
/*
* In-core and on-disk bookmark for DDT walks
*/
typedef struct ddt_bookmark {
uint64_t ddb_class;
uint64_t ddb_type;
uint64_t ddb_checksum;
uint64_t ddb_cursor;
} ddt_bookmark_t;
/*
* Ops vector to access a specific DDT object type.
*/
typedef struct ddt_ops {
char ddt_op_name[32];
int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
boolean_t prehash);
int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
int (*ddt_op_lookup)(objset_t *os, uint64_t object, ddt_entry_t *dde);
void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
ddt_entry_t *dde);
int (*ddt_op_update)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_remove)(objset_t *os, uint64_t object, ddt_entry_t *dde,
dmu_tx_t *tx);
int (*ddt_op_walk)(objset_t *os, uint64_t object, ddt_entry_t *dde,
uint64_t *walk);
uint64_t (*ddt_op_count)(objset_t *os, uint64_t object);
} ddt_ops_t;
#define DDT_NAMELEN 80
extern void ddt_object_name(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, char *name);
extern int ddt_object_walk(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, uint64_t *walk, ddt_entry_t *dde);
extern uint64_t ddt_object_count(ddt_t *ddt, enum ddt_type type,
enum ddt_class class);
extern int ddt_object_info(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, dmu_object_info_t *);
extern boolean_t ddt_object_exists(ddt_t *ddt, enum ddt_type type,
enum ddt_class class);
extern void ddt_bp_fill(const ddt_phys_t *ddp, blkptr_t *bp,
uint64_t txg);
extern void ddt_bp_create(enum zio_checksum checksum, const ddt_key_t *ddk,
const ddt_phys_t *ddp, blkptr_t *bp);
extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
extern void ddt_phys_fill(ddt_phys_t *ddp, const blkptr_t *bp);
extern void ddt_phys_clear(ddt_phys_t *ddp);
extern void ddt_phys_addref(ddt_phys_t *ddp);
extern void ddt_phys_decref(ddt_phys_t *ddp);
extern void ddt_phys_free(ddt_t *ddt, ddt_key_t *ddk, ddt_phys_t *ddp,
uint64_t txg);
extern ddt_phys_t *ddt_phys_select(const ddt_entry_t *dde, const blkptr_t *bp);
extern uint64_t ddt_phys_total_refcnt(const ddt_entry_t *dde);
extern void ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg);
extern void ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src);
extern void ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh);
extern boolean_t ddt_histogram_empty(const ddt_histogram_t *ddh);
extern void ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo);
extern void ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh);
extern void ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total);
extern uint64_t ddt_get_dedup_dspace(spa_t *spa);
extern uint64_t ddt_get_pool_dedup_ratio(spa_t *spa);
extern int ddt_ditto_copies_needed(ddt_t *ddt, ddt_entry_t *dde,
ddt_phys_t *ddp_willref);
extern int ddt_ditto_copies_present(ddt_entry_t *dde);
extern size_t ddt_compress(void *src, uchar_t *dst, size_t s_len, size_t d_len);
extern void ddt_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len);
extern ddt_t *ddt_select(spa_t *spa, const blkptr_t *bp);
extern void ddt_enter(ddt_t *ddt);
extern void ddt_exit(ddt_t *ddt);
extern ddt_entry_t *ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add);
extern void ddt_prefetch(spa_t *spa, const blkptr_t *bp);
extern void ddt_remove(ddt_t *ddt, ddt_entry_t *dde);
extern boolean_t ddt_class_contains(spa_t *spa, enum ddt_class max_class,
const blkptr_t *bp);
extern ddt_entry_t *ddt_repair_start(ddt_t *ddt, const blkptr_t *bp);
extern void ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde);
extern int ddt_entry_compare(const void *x1, const void *x2);
extern void ddt_create(spa_t *spa);
extern int ddt_load(spa_t *spa);
extern void ddt_unload(spa_t *spa);
extern void ddt_sync(spa_t *spa, uint64_t txg);
extern int ddt_walk(spa_t *spa, ddt_bookmark_t *ddb, ddt_entry_t *dde);
extern int ddt_object_update(ddt_t *ddt, enum ddt_type type,
enum ddt_class class, ddt_entry_t *dde, dmu_tx_t *tx);
extern const ddt_ops_t ddt_zap_ops;
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DDT_H */

View File

@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_DMU_H
#define _SYS_DMU_H
@ -38,12 +39,14 @@
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h>
#include <sys/time.h>
#ifdef __cplusplus
extern "C" {
#endif
struct uio;
struct xuio;
struct page;
struct vnode;
struct spa;
@ -59,8 +62,9 @@ struct drr_end;
struct zbookmark;
struct spa;
struct nvlist;
struct objset_impl;
struct arc_buf;
struct zio_prop;
struct sa_handle;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
@ -73,8 +77,8 @@ typedef enum dmu_object_type {
DMU_OT_OBJECT_ARRAY, /* UINT64 */
DMU_OT_PACKED_NVLIST, /* UINT8 (XDR by nvlist_pack/unpack) */
DMU_OT_PACKED_NVLIST_SIZE, /* UINT64 */
DMU_OT_BPLIST, /* UINT64 */
DMU_OT_BPLIST_HDR, /* UINT64 */
DMU_OT_BPOBJ, /* UINT64 */
DMU_OT_BPOBJ_HDR, /* UINT64 */
/* spa: */
DMU_OT_SPACE_MAP_HEADER, /* UINT64 */
DMU_OT_SPACE_MAP, /* UINT64 */
@ -114,10 +118,22 @@ typedef enum dmu_object_type {
DMU_OT_FUID, /* FUID table (Packed NVLIST UINT8) */
DMU_OT_FUID_SIZE, /* FUID table size UINT64 */
DMU_OT_NEXT_CLONES, /* ZAP */
DMU_OT_SCRUB_QUEUE, /* ZAP */
DMU_OT_SCAN_QUEUE, /* ZAP */
DMU_OT_USERGROUP_USED, /* ZAP */
DMU_OT_USERGROUP_QUOTA, /* ZAP */
DMU_OT_USERREFS, /* ZAP */
DMU_OT_DDT_ZAP, /* ZAP */
DMU_OT_DDT_STATS, /* ZAP */
DMU_OT_SA, /* System attr */
DMU_OT_SA_MASTER_NODE, /* ZAP */
DMU_OT_SA_ATTR_REGISTRATION, /* ZAP */
DMU_OT_SA_ATTR_LAYOUTS, /* ZAP */
DMU_OT_SCAN_XLATE, /* ZAP */
DMU_OT_DEDUP, /* fake dedup BP from ddt_bp_create() */
DMU_OT_DEADLIST, /* ZAP */
DMU_OT_DEADLIST_HDR, /* UINT64 */
DMU_OT_DSL_CLONES, /* ZAP */
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
DMU_OT_NUMTYPES
} dmu_object_type_t;
@ -140,16 +156,6 @@ void zfs_oldacl_byteswap(void *buf, size_t size);
void zfs_acl_byteswap(void *buf, size_t size);
void zfs_znode_byteswap(void *buf, size_t size);
#define DS_MODE_NOHOLD 0 /* internal use only */
#define DS_MODE_USER 1 /* simple access, no special needs */
#define DS_MODE_OWNER 2 /* the "main" access, e.g. a mount */
#define DS_MODE_TYPE_MASK 0x3
#define DS_MODE_TYPE(x) ((x) & DS_MODE_TYPE_MASK)
#define DS_MODE_READONLY 0x8
#define DS_MODE_IS_READONLY(x) ((x) & DS_MODE_READONLY)
#define DS_MODE_INCONSISTENT 0x10
#define DS_MODE_IS_INCONSISTENT(x) ((x) & DS_MODE_INCONSISTENT)
#define DS_FIND_SNAPSHOTS (1<<0)
#define DS_FIND_CHILDREN (1<<1)
@ -162,27 +168,35 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DMU_USERUSED_OBJECT (-1ULL)
#define DMU_GROUPUSED_OBJECT (-2ULL)
#define DMU_DEADLIST_OBJECT (-3ULL)
/*
* artificial blkids for bonus buffer and spill blocks
*/
#define DMU_BONUS_BLKID (-1ULL)
#define DMU_SPILL_BLKID (-2ULL)
/*
* Public routines to create, destroy, open, and close objsets.
*/
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
int dmu_objset_open_ds(struct dsl_dataset *ds, dmu_objset_type_t type,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, struct nvlist *props,
boolean_t recursive);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
@ -201,7 +215,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPLIST "sync_bplist"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
#define DMU_POOL_ERRLOG_LAST "errlog_last"
#define DMU_POOL_SPARES "spares"
@ -209,19 +223,12 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_HISTORY "history"
#define DMU_POOL_PROPS "pool_props"
#define DMU_POOL_L2CACHE "l2cache"
/* 4x8 zbookmark_t */
#define DMU_POOL_SCRUB_BOOKMARK "scrub_bookmark"
/* 1x8 zap obj DMU_OT_SCRUB_QUEUE */
#define DMU_POOL_SCRUB_QUEUE "scrub_queue"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MIN_TXG "scrub_min_txg"
/* 1x8 txg */
#define DMU_POOL_SCRUB_MAX_TXG "scrub_max_txg"
/* 1x4 enum scrub_func */
#define DMU_POOL_SCRUB_FUNC "scrub_func"
/* 1x8 count */
#define DMU_POOL_SCRUB_ERRORS "scrub_errors"
#define DMU_POOL_TMP_USERREFS "tmp_userrefs"
#define DMU_POOL_DDT "DDT-%s-%s-%s"
#define DMU_POOL_DDT_STATS "DDT-statistics"
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
/*
* Allocate an object from this objset. The range of object numbers
@ -306,11 +313,14 @@ void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
dmu_tx_t *tx);
/*
* Decide how many copies of a given block we should make. Can be from
* 1 to SPA_DVAS_PER_BP.
* Decide how to write a block: checksum, compression, number of copies, etc.
*/
int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
dmu_object_type_t ot);
#define WP_NOFILL 0x1
#define WP_DMU_SYNC 0x2
#define WP_SPILL 0x4
void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
struct zio_prop *zp);
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a
@ -324,6 +334,17 @@ int dmu_get_replication_level(struct objset_impl *, struct zbookmark *zb,
int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
int dmu_bonus_max(void);
int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
/*
* Special spill buffer support used by "SA" framework
*/
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
/*
* Obtain the DMU buffer from the specified object which contains the
@ -340,7 +361,7 @@ int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
* The object number must be a valid, allocated object number.
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **);
void *tag, dmu_buf_t **, int flags);
void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
void dmu_buf_rele(dmu_buf_t *db, void *tag);
uint64_t dmu_buf_refcount(dmu_buf_t *db);
@ -437,11 +458,34 @@ void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
/*
* To register a commit callback, dmu_tx_callback_register() must be called.
*
* dcb_data is a pointer to caller private data that is passed on as a
* callback parameter. The caller is responsible for properly allocating and
* freeing it.
*
* When registering a callback, the transaction must be already created, but
* it cannot be committed or aborted. It can be assigned to a txg or not.
*
* The callback will be called after the transaction has been safely written
* to stable storage and will also be called if the dmu_tx is aborted.
* If there is any error which prevents the transaction from being committed to
* disk, the callback will be called with a value of error != 0.
*/
typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
/*
* Free up the data blocks for a defined range of a file. If size is
* zero, the range from offset to end-of-file is freed.
@ -469,12 +513,23 @@ void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct page *pp, dmu_tx_t *tx);
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
size_t n);
int dmu_xuio_cnt(struct xuio *uio);
struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
void dmu_xuio_clear(struct xuio *uio, int i);
void xuio_stat_wbuf_copied();
void xuio_stat_wbuf_nocopy();
extern int zfs_prefetch_disable;
@ -485,19 +540,19 @@ void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
uint64_t len);
typedef struct dmu_object_info {
/* All sizes are in bytes. */
/* All sizes are in bytes unless otherwise indicated. */
uint32_t doi_data_block_size;
uint32_t doi_metadata_block_size;
uint64_t doi_bonus_size;
dmu_object_type_t doi_type;
dmu_object_type_t doi_bonus_type;
uint64_t doi_bonus_size;
uint8_t doi_indirection; /* 2 = dnode->indirect->data */
uint8_t doi_checksum;
uint8_t doi_compress;
uint8_t doi_pad[5];
/* Values below are number of 512-byte blocks. */
uint64_t doi_physical_blks; /* data + metadata */
uint64_t doi_max_block_offset;
uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */
uint64_t doi_max_offset;
uint64_t doi_fill_count; /* number of non-empty blocks */
} dmu_object_info_t;
typedef void arc_byteswap_func_t(void *buf, size_t size);
@ -566,6 +621,11 @@ void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
*/
uint64_t dmu_objset_fsid_guid(objset_t *os);
/*
* Get the [cm]time for an objset's snapshot dir
*/
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
int dmu_objset_is_snapshot(objset_t *os);
extern struct spa *dmu_objset_spa(objset_t *os);
@ -575,6 +635,8 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
extern uint64_t dmu_objset_syncprop(objset_t *os);
extern uint64_t dmu_objset_logbias(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
@ -582,9 +644,8 @@ extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
uint64_t *idp, uint64_t *offp);
typedef void objset_used_cb_t(objset_t *os, dmu_object_type_t bonustype,
void *oldbonus, void *newbonus, uint64_t oldused, uint64_t newused,
dmu_tx_t *tx);
typedef int objset_used_cb_t(dmu_object_type_t bonustype,
void *bonus, uint64_t *userp, uint64_t *groupp);
extern void dmu_objset_register_type(dmu_objset_type_t ost,
objset_used_cb_t *cb);
extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
@ -605,9 +666,20 @@ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
* storage when the write completes this new data does not become a
* permanent part of the file until the associated transaction commits.
*/
typedef void dmu_sync_cb_t(dmu_buf_t *db, void *arg);
int dmu_sync(struct zio *zio, dmu_buf_t *db,
struct blkptr *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg);
/*
* {zfs,zvol,ztest}_get_done() args
*/
typedef struct zgd {
struct zilog *zgd_zilog;
struct blkptr *zgd_bp;
dmu_buf_t *zgd_db;
struct rl *zgd_rl;
void *zgd_private;
} zgd_t;
typedef void dmu_sync_cb_t(zgd_t *arg, int error);
int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
/*
* Find the next hole or data block in file starting at *off
@ -642,11 +714,12 @@ typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
char *drc_top_ds;
boolean_t drc_newfs;
boolean_t drc_force;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *,
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp);
int dmu_recv_end(dmu_recv_cookie_t *drc);

View File

@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@ -210,8 +210,7 @@ extern "C" {
*
* ds_lock
* protects:
* ds_user_ptr
* ds_user_evict_func
* ds_objset
* ds_open_refcount
* ds_snapname
* ds_phys accounting
@ -233,6 +232,39 @@ extern "C" {
struct objset;
struct dmu_pool;
typedef struct dmu_xuio {
int next;
int cnt;
struct arc_buf **bufs;
iovec_t *iovp;
} dmu_xuio_t;
typedef struct xuio_stats {
/* loaned yet not returned arc_buf */
kstat_named_t xuiostat_onloan_rbuf;
kstat_named_t xuiostat_onloan_wbuf;
/* whether a copy is made when loaning out a read buffer */
kstat_named_t xuiostat_rbuf_copied;
kstat_named_t xuiostat_rbuf_nocopy;
/* whether a copy is made when assigning a write buffer */
kstat_named_t xuiostat_wbuf_copied;
kstat_named_t xuiostat_wbuf_nocopy;
} xuio_stats_t;
static xuio_stats_t xuio_stats = {
{ "onloan_read_buf", KSTAT_DATA_UINT64 },
{ "onloan_write_buf", KSTAT_DATA_UINT64 },
{ "read_buf_copied", KSTAT_DATA_UINT64 },
{ "read_buf_nocopy", KSTAT_DATA_UINT64 },
{ "write_buf_copied", KSTAT_DATA_UINT64 },
{ "write_buf_nocopy", KSTAT_DATA_UINT64 }
};
#define XUIOSTAT_INCR(stat, val) \
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
#ifdef __cplusplus
}
#endif

View File

@ -19,10 +19,11 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#ifndef _SYS_DMU_OBJSET_H
#define _SYS_DMU_OBJSET_H
@ -33,6 +34,7 @@
#include <sys/dnode.h>
#include <sys/zio.h>
#include <sys/zil.h>
#include <sys/sa.h>
#ifdef __cplusplus
extern "C" {
@ -40,11 +42,13 @@ extern "C" {
struct dsl_dataset;
struct dmu_tx;
struct objset_impl;
#define OBJSET_PHYS_SIZE 2048
#define OBJSET_OLD_PHYS_SIZE 1024
#define OBJSET_BUF_HAS_USERUSED(buf) \
(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
typedef struct objset_phys {
@ -59,11 +63,6 @@ typedef struct objset_phys {
} objset_phys_t;
struct objset {
struct objset_impl *os;
int os_mode;
};
typedef struct objset_impl {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
spa_t *os_spa;
@ -73,12 +72,17 @@ typedef struct objset_impl {
dnode_t *os_userused_dnode;
dnode_t *os_groupused_dnode;
zilog_t *os_zil;
objset_t os;
uint8_t os_checksum; /* can change, under dsl_dir's locks */
uint8_t os_compress; /* can change, under dsl_dir's locks */
uint8_t os_copies; /* can change, under dsl_dir's locks */
uint8_t os_primary_cache; /* can change, under dsl_dir's locks */
uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */
/* can change, under dsl_dir's locks: */
uint8_t os_checksum;
uint8_t os_compress;
uint8_t os_copies;
uint8_t os_dedup_checksum;
uint8_t os_dedup_verify;
uint8_t os_logbias;
uint8_t os_primary_cache;
uint8_t os_secondary_cache;
uint8_t os_sync;
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
@ -101,8 +105,12 @@ typedef struct objset_impl {
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
} objset_impl_t;
/* SA layout/attribute registration */
sa_os_t *os_sa;
};
#define DMU_META_OBJSET 0
#define DMU_META_DNODE_OBJECT 0
#define DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
@ -111,14 +119,18 @@ typedef struct objset_impl {
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
/* called from zpl */
int dmu_objset_open(const char *name, dmu_objset_type_t type, int mode,
objset_t **osp);
void dmu_objset_close(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type,
objset_t *clone_parent, uint64_t flags,
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
boolean_t readonly, void *tag, objset_t **osp);
void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_objset_rollback(objset_t *os);
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props,
boolean_t recursive);
void dmu_objset_stats(objset_t *os, nvlist_t *nv);
@ -126,23 +138,26 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find(char *name, int func(char *, void *), void *arg,
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
int dmu_objset_prefetch(char *name, void *arg);
int dmu_objset_prefetch(const char *name, void *arg);
void dmu_objset_byteswap(void *buf, size_t size);
int dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_impl_t *os, zio_t *zio, dmu_tx_t *tx);
objset_impl_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
objset_impl_t **osip);
void dmu_objset_evict(struct dsl_dataset *ds, void *arg);
void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_impl_t *os);
objset_t **osp);
void dmu_objset_evict(objset_t *os);
void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@ -36,19 +35,24 @@ extern "C" {
struct dnode_phys;
struct dsl_dataset;
struct zilog;
struct arc_buf;
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct arc_buf *pbuf, const zbookmark_t *zb, const struct dnode_phys *dnp,
void *arg);
#define TRAVERSE_PRE (1<<0)
#define TRAVERSE_POST (1<<1)
#define TRAVERSE_PREFETCH_METADATA (1<<2)
#define TRAVERSE_PREFETCH_DATA (1<<3)
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
#define TRAVERSE_HARD (1<<4)
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
#ifdef __cplusplus
}

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/dmu.h>
#include <sys/txg.h>
@ -59,6 +57,7 @@ struct dmu_tx {
txg_handle_t tx_txgh;
void *tx_tempreserve_cookie;
struct dmu_tx_hold *tx_needassign_txh;
list_t tx_callbacks; /* list of dmu_tx_callback_t on this dmu_tx */
uint8_t tx_anyobj;
int tx_err;
#ifdef ZFS_DEBUG
@ -78,6 +77,7 @@ enum dmu_tx_hold_type {
THT_FREE,
THT_ZAP,
THT_SPACE,
THT_SPILL,
THT_NUMTYPES
};
@ -98,6 +98,11 @@ typedef struct dmu_tx_hold {
#endif
} dmu_tx_hold_t;
typedef struct dmu_tx_callback {
list_node_t dcb_node; /* linked to tx_callbacks list */
dmu_tx_callback_func_t *dcb_func; /* caller function pointer */
void *dcb_data; /* caller private data */
} dmu_tx_callback_t;
/*
* These routines are defined in dmu.h, and are called by the user.
@ -109,6 +114,10 @@ void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
void *dcb_data);
void dmu_tx_do_callbacks(list_t *cb_list, int error);
/*
* These routines are defined in dmu_spa.h, and are called by the SPA.
*/

View File

@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _DFETCH_H
#define _DFETCH_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#ifdef __cplusplus
@ -63,6 +61,9 @@ typedef struct zfetch {
uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void zfetch_init(void);
void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DNODE_H
@ -62,6 +61,18 @@ extern "C" {
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
#define DN_MAX_OFFSET_SHIFT 64 /* 2^64 bytes in a dnode */
/*
* dnode id flags
*
* Note: a file will never ever have its
* ids moved from bonus->spill
* and only in a crypto environment would it be on spill
*/
#define DN_ID_CHKED_BONUS 0x1
#define DN_ID_CHKED_SPILL 0x2
#define DN_ID_OLD_EXIST 0x4
#define DN_ID_NEW_EXIST 0x8
/*
* Derived constants.
*/
@ -70,10 +81,12 @@ extern "C" {
#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
#define DN_KILL_SPILLBLK (1)
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
#define DNODES_PER_LEVEL (1ULL << DNODES_PER_LEVEL_SHIFT)
/* The +2 here is a cheesy way to round up */
#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
@ -88,7 +101,7 @@ extern "C" {
#define EPB(blkshift, typeshift) (1 << (blkshift - typeshift))
struct dmu_buf_impl;
struct objset_impl;
struct objset;
struct zio;
enum dnode_dirtycontext {
@ -101,6 +114,9 @@ enum dnode_dirtycontext {
#define DNODE_FLAG_USED_BYTES (1<<0)
#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
/* Does dnode have a SA spill blkptr in bonus? */
#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
uint8_t dn_indblkshift; /* ln2(indirect block size) */
@ -121,7 +137,8 @@ typedef struct dnode_phys {
uint64_t dn_pad3[4];
blkptr_t dn_blkptr[1];
uint8_t dn_bonus[DN_MAX_BONUSLEN];
uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
blkptr_t dn_spill;
} dnode_phys_t;
typedef struct dnode {
@ -136,7 +153,7 @@ typedef struct dnode {
list_node_t dn_link;
/* immutable: */
struct objset_impl *dn_objset;
struct objset *dn_objset;
uint64_t dn_object;
struct dmu_buf_impl *dn_dbuf;
dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
@ -161,6 +178,8 @@ typedef struct dnode {
uint8_t dn_next_nblkptr[TXG_SIZE];
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
uint8_t dn_next_bonustype[TXG_SIZE];
uint8_t dn_rm_spillblk[TXG_SIZE]; /* for removing spill blk */
uint16_t dn_next_bonuslen[TXG_SIZE];
uint32_t dn_next_blksz[TXG_SIZE]; /* next block size in bytes */
@ -185,12 +204,17 @@ typedef struct dnode {
kmutex_t dn_dbufs_mtx;
list_t dn_dbufs; /* linked list of descendent dbuf_t's */
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */
boolean_t dn_have_spill; /* have spill or are spilling */
/* parent IO for current sync write */
zio_t *dn_zio;
/* used in syncing context */
dnode_phys_t *dn_oldphys;
uint64_t dn_oldused; /* old phys used bytes */
uint64_t dn_oldflags; /* old phys dn_flags */
uint64_t dn_olduid, dn_oldgid;
uint64_t dn_newuid, dn_newgid;
int dn_id_flags;
/* holds prefetch structure */
struct zfetch dn_zfetch;
@ -202,14 +226,17 @@ typedef struct free_range {
uint64_t fr_nblks;
} free_range_t;
dnode_t *dnode_special_open(struct objset_impl *dd, dnode_phys_t *dnp,
dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
uint64_t object);
void dnode_special_close(dnode_t *dn);
void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
int dnode_hold(struct objset_impl *dd, uint64_t object,
void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
int dnode_hold(struct objset *dd, uint64_t object,
void *ref, dnode_t **dnp);
int dnode_hold_impl(struct objset_impl *dd, uint64_t object, int flag,
int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DATASET_H
@ -33,6 +32,7 @@
#include <sys/bplist.h>
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#ifdef __cplusplus
extern "C" {
@ -42,8 +42,6 @@ struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
typedef void dsl_dataset_evict_func_t(struct dsl_dataset *, void *);
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
@ -85,7 +83,7 @@ typedef struct dsl_dataset_phys {
uint64_t ds_num_children; /* clone/snap children; ==0 for head */
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_BPLIST */
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
uint64_t ds_used_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
@ -115,10 +113,10 @@ typedef struct dsl_dataset {
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_origin_txg;
/* has internal locking: */
bplist_t ds_deadlist;
dsl_deadlist_t ds_deadlist;
bplist_t ds_pending_deadlist;
/* to protect against multiple concurrent incremental recv */
kmutex_t ds_recvlock;
@ -132,8 +130,7 @@ typedef struct dsl_dataset {
* Protected by ds_lock:
*/
kmutex_t ds_lock;
void *ds_user_ptr;
dsl_dataset_evict_func_t *ds_user_evict_func;
objset_t *ds_objset;
uint64_t ds_userrefs;
/*
@ -165,7 +162,7 @@ struct dsl_ds_destroyarg {
boolean_t need_prep; /* do we need to retry due to EBUSY? */
};
#define dsl_dataset_is_snapshot(ds) \
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
@ -174,17 +171,17 @@ struct dsl_ds_destroyarg {
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
int dsl_dataset_own(const char *name, int flags, void *owner,
dsl_dataset_t **dsp);
int dsl_dataset_own(const char *name, boolean_t inconsistentok,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
int flags, void *owner, dsl_dataset_t **);
boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void dsl_dataset_disown(dsl_dataset_t *ds, void *owner);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
void *owner);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner);
void *tag);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@ -195,21 +192,18 @@ dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_checkfunc_t dsl_dataset_snapshot_check;
dsl_syncfunc_t dsl_dataset_snapshot_sync;
int dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost);
int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
int dsl_dataset_promote(const char *name);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
boolean_t recursive);
boolean_t recursive, boolean_t temphold);
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
boolean_t recursive);
int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
char *htag);
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
void *dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
void *p, dsl_dataset_evict_func_t func);
void *dsl_dataset_get_user_ptr(dsl_dataset_t *ds);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@ -219,10 +213,12 @@ boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx);
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth);
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx, boolean_t async);
boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
uint64_t blk_birth);
uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
@ -238,13 +234,13 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, uint64_t quota);
void dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr,
dmu_tx_t *tx);
int dsl_dataset_set_reservation(const char *dsname, uint64_t reservation);
void dsl_dataset_set_flags(dsl_dataset_t *ds, uint64_t flags);
int64_t dsl_dataset_new_refreservation(dsl_dataset_t *ds, uint64_t reservation,
dmu_tx_t *tx);
int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
uint64_t quota);
dsl_syncfunc_t dsl_dataset_set_quota_sync;
int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \

View File

@ -0,0 +1,87 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DEADLIST_H
#define _SYS_DSL_DEADLIST_H
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dmu_buf;
struct dsl_dataset;
typedef struct dsl_deadlist_phys {
uint64_t dl_used;
uint64_t dl_comp;
uint64_t dl_uncomp;
uint64_t dl_pad[37]; /* pad out to 320b for future expansion */
} dsl_deadlist_phys_t;
typedef struct dsl_deadlist {
objset_t *dl_os;
uint64_t dl_object;
avl_tree_t dl_tree;
boolean_t dl_havetree;
struct dmu_buf *dl_dbuf;
dsl_deadlist_phys_t *dl_phys;
kmutex_t dl_lock;
/* if it's the old on-disk format: */
bpobj_t dl_bpobj;
boolean_t dl_oldfmt;
} dsl_deadlist_t;
typedef struct dsl_deadlist_entry {
avl_node_t dle_node;
uint64_t dle_mintxg;
bpobj_t dle_bpobj;
} dsl_deadlist_entry_t;
void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object);
void dsl_deadlist_close(dsl_deadlist_t *dl);
uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx);
void dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx);
void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx);
void dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
void dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx);
uint64_t dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
uint64_t mrs_obj, dmu_tx_t *tx);
void dsl_deadlist_space(dsl_deadlist_t *dl,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
void dsl_deadlist_space_range(dsl_deadlist_t *dl,
uint64_t mintxg, uint64_t maxtxg,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx);
void dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DEADLIST_H */

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@ -70,7 +69,8 @@ typedef struct dsl_dir_phys {
uint64_t dd_deleg_zapobj; /* dataset delegation permissions */
uint64_t dd_flags;
uint64_t dd_used_breakdown[DD_USED_NUM];
uint64_t dd_pad[14]; /* pad out to 256 bytes for good measure */
uint64_t dd_clones; /* dsl_dir objects */
uint64_t dd_pad[13]; /* pad out to 256 bytes for good measure */
} dsl_dir_phys_t;
struct dsl_dir {
@ -89,6 +89,8 @@ struct dsl_dir {
/* Protected by dd_lock */
kmutex_t dd_lock;
list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
uint64_t dd_origin_txg;
/* gross estimate of space used by in-flight tx's */
uint64_t dd_tempreserved[TXG_SIZE];
@ -125,18 +127,24 @@ void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx);
void dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx);
int dsl_dir_set_quota(const char *ddname, uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, uint64_t reservation);
int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"
#define ORIGIN_DIR_NAME "$ORIGIN"
#define XLATION_DIR_NAME "$XLATION"
#define FREE_DIR_NAME "$FREE"
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@ -32,6 +31,9 @@
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#include <sys/ddt.h>
#include <sys/arc.h>
#include <sys/bpobj.h>
#ifdef __cplusplus
extern "C" {
@ -42,12 +44,7 @@ struct dsl_dir;
struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
enum scrub_func {
SCRUB_FUNC_NONE,
SCRUB_FUNC_CLEAN,
SCRUB_FUNC_NUMFUNCS
};
struct dsl_scan;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
@ -75,6 +72,7 @@ typedef struct dsl_pool {
struct objset *dp_meta_objset;
struct dsl_dir *dp_root_dir;
struct dsl_dir *dp_mos_dir;
struct dsl_dir *dp_free_dir;
struct dsl_dataset *dp_origin_snap;
uint64_t dp_root_dir_obj;
struct taskq *dp_vnrele_taskq;
@ -83,25 +81,18 @@ typedef struct dsl_pool {
blkptr_t dp_meta_rootbp;
list_t dp_synced_datasets;
hrtime_t dp_read_overhead;
uint64_t dp_throughput;
uint64_t dp_throughput; /* bytes per millisec */
uint64_t dp_write_limit;
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
struct dsl_scan *dp_scan;
/* Uses dp_lock */
kmutex_t dp_lock;
uint64_t dp_space_towrite[TXG_SIZE];
uint64_t dp_tempreserved[TXG_SIZE];
enum scrub_func dp_scrub_func;
uint64_t dp_scrub_queue_obj;
uint64_t dp_scrub_min_txg;
uint64_t dp_scrub_max_txg;
zbookmark_t dp_scrub_bookmark;
boolean_t dp_scrub_pausing;
boolean_t dp_scrub_isresilver;
uint64_t dp_scrub_start_time;
kmutex_t dp_scrub_cancel_lock; /* protects dp_scrub_restart */
boolean_t dp_scrub_restart;
/* Has its own locking */
tx_state_t dp_tx;
txg_list_t dp_dirty_datasets;
@ -123,29 +114,36 @@ int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
void dsl_pool_zil_clean(dsl_pool_t *dp);
void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg);
int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree);
uint64_t dsl_pool_adjustedfree(dsl_pool_t *dp, boolean_t netfree);
int dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx);
void dsl_pool_tempreserve_clear(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_memory_pressure(dsl_pool_t *dp);
void dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
void dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg,
const blkptr_t *bpp);
int dsl_read(zio_t *pio, spa_t *spa, const blkptr_t *bpp, arc_buf_t *pbuf,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
arc_done_func_t *done, void *private, int priority, int zio_flags,
uint32_t *arc_flags, const zbookmark_t *zb);
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
int dsl_pool_scrub_cancel(dsl_pool_t *dp);
int dsl_pool_scrub_clean(dsl_pool_t *dp);
void dsl_pool_scrub_sync(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_scrub_restart(dsl_pool_t *dp);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx);
extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, dmu_tx_t *tx);
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
#ifdef __cplusplus
}
#endif

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_PROP_H
@ -49,6 +48,25 @@ typedef struct dsl_prop_cb_record {
void *cbr_arg;
} dsl_prop_cb_record_t;
typedef struct dsl_props_arg {
nvlist_t *pa_props;
zprop_source_t pa_source;
} dsl_props_arg_t;
typedef struct dsl_prop_set_arg {
const char *psa_name;
zprop_source_t psa_source;
int psa_intsz;
int psa_numints;
const void *psa_value;
/*
* Used to handle the special requirements of the quota and reservation
* properties.
*/
uint64_t psa_effective_value;
} dsl_prop_setarg_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
@ -59,18 +77,36 @@ int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp, boolean_t local);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int intsz, int numints, void *buf, char *setpoint,
boolean_t snapshot);
dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, nvlist_t *nvl);
zprop_source_t source, int intsz, int numints, const void *buf);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
cred_t *cr, dmu_tx_t *tx);
dmu_tx_t *tx);
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value);
int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#ifdef ZFS_DEBUG
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
dsl_prop_check_prediction((dd), (psa))
#else
#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
#endif
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
boolean_t dsl_prop_get_hasrecvd(objset_t *os);
void dsl_prop_set_hasrecvd(objset_t *os);
void dsl_prop_unset_hasrecvd(objset_t *os);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,

View File

@ -0,0 +1,108 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
#define _SYS_DSL_SCAN_H
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/ddt.h>
#include <sys/bplist.h>
#ifdef __cplusplus
extern "C" {
#endif
struct objset;
struct dsl_dir;
struct dsl_dataset;
struct dsl_pool;
struct dmu_tx;
/*
* All members of this structure must be uint64_t, for byteswap
* purposes.
*/
typedef struct dsl_scan_phys {
uint64_t scn_func; /* pool_scan_func_t */
uint64_t scn_state; /* dsl_scan_state_t */
uint64_t scn_queue_obj;
uint64_t scn_min_txg;
uint64_t scn_max_txg;
uint64_t scn_cur_min_txg;
uint64_t scn_cur_max_txg;
uint64_t scn_start_time;
uint64_t scn_end_time;
uint64_t scn_to_examine; /* total bytes to be scanned */
uint64_t scn_examined; /* bytes scanned so far */
uint64_t scn_to_process;
uint64_t scn_processed;
uint64_t scn_errors; /* scan I/O error count */
uint64_t scn_ddt_class_max;
ddt_bookmark_t scn_ddt_bookmark;
zbookmark_t scn_bookmark;
uint64_t scn_flags; /* dsl_scan_flags_t */
} dsl_scan_phys_t;
#define SCAN_PHYS_NUMINTS (sizeof (dsl_scan_phys_t) / sizeof (uint64_t))
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
} dsl_scan_flags_t;
typedef struct dsl_scan {
struct dsl_pool *scn_dp;
boolean_t scn_pausing;
uint64_t scn_restart_txg;
uint64_t scn_sync_start_time;
zio_t *scn_zio_root;
/* for debugging / information */
uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys;
} dsl_scan_t;
int dsl_scan_init(struct dsl_pool *dp, uint64_t txg);
void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
void dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
ddt_entry_t *dde, dmu_tx_t *tx);
void dsl_scan_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_SCAN_H */

View File

@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_DSL_SYNCTASK_H
#define _SYS_DSL_SYNCTASK_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/txg.h>
#include <sys/zfs_context.h>
@ -38,7 +35,7 @@ extern "C" {
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, cred_t *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
@ -53,7 +50,6 @@ typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
cred_t *dstg_cr;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;

View File

@ -68,6 +68,18 @@ extern "C" {
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET "zio_offset"
#define FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE "zio_size"
#define FM_EREPORT_PAYLOAD_ZFS_PREV_STATE "prev_state"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_EXPECTED "cksum_expected"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ACTUAL "cksum_actual"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_ALGO "cksum_algorithm"
#define FM_EREPORT_PAYLOAD_ZFS_CKSUM_BYTESWAP "cksum_byteswap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_OFFSET_RANGES "bad_ranges"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_MIN_GAP "bad_ranges_min_gap"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_SETS "bad_range_sets"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_RANGE_CLEARS "bad_range_clears"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_BITS "bad_set_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_BITS "bad_cleared_bits"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_SET_HISTOGRAM "bad_set_histogram"
#define FM_EREPORT_PAYLOAD_ZFS_BAD_CLEARED_HISTOGRAM "bad_cleared_histogram"
#define FM_EREPORT_FAILMODE_WAIT "wait"
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
@ -75,6 +87,7 @@ extern "C" {
#define FM_RESOURCE_REMOVED "removed"
#define FM_RESOURCE_AUTOREPLACE "autoreplace"
#define FM_RESOURCE_STATECHANGE "statechange"
#ifdef __cplusplus
}

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_FM_PROTOCOL_H
@ -47,6 +46,7 @@ extern "C" {
/* FM event class values */
#define FM_EREPORT_CLASS "ereport"
#define FM_FAULT_CLASS "fault"
#define FM_DEFECT_CLASS "defect"
#define FM_RSRC_CLASS "resource"
#define FM_LIST_EVENT "list"
@ -83,6 +83,7 @@ extern "C" {
#define FM_SUSPECT_FAULT_LIST "fault-list"
#define FM_SUSPECT_FAULT_SZ "fault-list-sz"
#define FM_SUSPECT_FAULT_STATUS "fault-status"
#define FM_SUSPECT_INJECTED "__injected"
#define FM_SUSPECT_MESSAGE "message"
#define FM_SUSPECT_RETIRE "retire"
#define FM_SUSPECT_RESPONSE "response"
@ -122,6 +123,7 @@ extern "C" {
#define FM_RSRC_ASRU_REPAIRED "repaired"
#define FM_RSRC_ASRU_REPLACED "replaced"
#define FM_RSRC_ASRU_ACQUITTED "acquitted"
#define FM_RSRC_ASRU_RESOLVED "resolved"
#define FM_RSRC_ASRU_UNUSABLE "unusable"
#define FM_RSRC_ASRU_EVENT "event"
@ -170,6 +172,7 @@ extern "C" {
/* FMRI authority-type member names */
#define FM_FMRI_AUTH_CHASSIS "chassis-id"
#define FM_FMRI_AUTH_PRODUCT_SN "product-sn"
#define FM_FMRI_AUTH_PRODUCT "product-id"
#define FM_FMRI_AUTH_DOMAIN "domain-id"
#define FM_FMRI_AUTH_SERVER "server-id"
@ -243,6 +246,7 @@ extern "C" {
/* dev scheme member names */
#define FM_FMRI_DEV_ID "devid"
#define FM_FMRI_DEV_TGTPTLUN0 "target-port-l0id"
#define FM_FMRI_DEV_PATH "device-path"
/* pkg scheme member names */
@ -311,7 +315,7 @@ extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
int, ...);
extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
const char *);
const char *, const char *);
extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
uint8_t *, const char *);
@ -320,6 +324,8 @@ extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
const char *, const char *);
extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
extern void fm_fmri_hc_create(nvlist_t *, int, const nvlist_t *, nvlist_t *,
nvlist_t *, int, ...);
extern uint64_t fm_ena_increment(uint64_t);
extern uint64_t fm_ena_generate(uint64_t, uchar_t);

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@ -36,9 +35,6 @@
extern "C" {
#endif
typedef struct metaslab_class metaslab_class_t;
typedef struct metaslab_group metaslab_group_t;
extern space_map_ops_t *zfs_metaslab_ops;
extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
@ -46,6 +42,7 @@ extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_reassess(metaslab_group_t *mg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
@ -57,14 +54,24 @@ extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern metaslab_class_t *metaslab_class_create(space_map_ops_t *ops);
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
space_map_ops_t *ops);
extern void metaslab_class_destroy(metaslab_class_t *mc);
extern void metaslab_class_add(metaslab_class_t *mc, metaslab_group_t *mg);
extern void metaslab_class_remove(metaslab_class_t *mc, metaslab_group_t *mg);
extern int metaslab_class_validate(metaslab_class_t *mc);
extern void metaslab_class_space_update(metaslab_class_t *mc,
int64_t alloc_delta, int64_t defer_delta,
int64_t space_delta, int64_t dspace_delta);
extern uint64_t metaslab_class_get_alloc(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_space(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_dspace(metaslab_class_t *mc);
extern uint64_t metaslab_class_get_deferred(metaslab_class_t *mc);
extern metaslab_group_t *metaslab_group_create(metaslab_class_t *mc,
vdev_t *vd);
extern void metaslab_group_destroy(metaslab_group_t *mg);
extern void metaslab_group_activate(metaslab_group_t *mg);
extern void metaslab_group_passivate(metaslab_group_t *mg);
#ifdef __cplusplus
}

View File

@ -37,16 +37,23 @@ extern "C" {
#endif
struct metaslab_class {
spa_t *mc_spa;
metaslab_group_t *mc_rotor;
uint64_t mc_allocated;
space_map_ops_t *mc_ops;
uint64_t mc_aliquot;
uint64_t mc_alloc; /* total allocated space */
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
uint64_t mc_dspace; /* total deflated space */
};
struct metaslab_group {
kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
uint64_t mg_bonus_area;
int64_t mg_bias;
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
metaslab_group_t *mg_prev;
@ -66,7 +73,9 @@ struct metaslab {
space_map_obj_t ms_smo_syncing; /* syncing space map object */
space_map_t ms_allocmap[TXG_SIZE]; /* allocated this txg */
space_map_t ms_freemap[TXG_SIZE]; /* freed this txg */
space_map_t ms_defermap[TXG_DEFER_SIZE]; /* deferred frees */
space_map_t ms_map; /* in-core free space map */
int64_t ms_deferspace; /* sum of ms_defermap[] space */
uint64_t ms_weight; /* weight vs. others in group */
metaslab_group_t *ms_group; /* metaslab group */
avl_node_t ms_group_node; /* node in metaslab group tree */

View File

@ -19,15 +19,12 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_REFCOUNT_H
#define _SYS_REFCOUNT_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/inttypes.h>
#include <sys/list.h>
#include <sys/zfs_context.h>
@ -91,6 +88,11 @@ typedef struct refcount {
atomic_add_64_nv(&(rc)->rc_count, number)
#define refcount_remove_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, -number)
#define refcount_transfer(dst, src) { \
uint64_t __tmp = (src)->rc_count; \
atomic_add_64(&(src)->rc_count, -__tmp); \
atomic_add_64(&(dst)->rc_count, __tmp); \
}
#define refcount_init()
#define refcount_fini()

171
module/zfs/include/sys/sa.h Normal file
View File

@ -0,0 +1,171 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SA_H
#define _SYS_SA_H
#include <sys/dmu.h>
/*
* Currently available byteswap functions.
* If it all possible new attributes should used
* one of the already defined byteswap functions.
* If a new byteswap function is added then the
* ZPL/Pool version will need to be bumped.
*/
typedef enum sa_bswap_type {
SA_UINT64_ARRAY,
SA_UINT32_ARRAY,
SA_UINT16_ARRAY,
SA_UINT8_ARRAY,
SA_ACL,
} sa_bswap_type_t;
typedef uint16_t sa_attr_type_t;
/*
* Attribute to register support for.
*/
typedef struct sa_attr_reg {
char *sa_name; /* attribute name */
uint16_t sa_length;
sa_bswap_type_t sa_byteswap; /* bswap functon enum */
sa_attr_type_t sa_attr; /* filled in during registration */
} sa_attr_reg_t;
typedef void (sa_data_locator_t)(void **, uint32_t *, uint32_t,
boolean_t, void *userptr);
/*
* array of attributes to store.
*
* This array should be treated as opaque/private data.
* The SA_BULK_ADD_ATTR() macro should be used for manipulating
* the array.
*
* When sa_replace_all_by_template() is used the attributes
* will be stored in the order defined in the array, except that
* the attributes may be split between the bonus and the spill buffer
*
*/
typedef struct sa_bulk_attr {
void *sa_data;
sa_data_locator_t *sa_data_func;
uint16_t sa_length;
sa_attr_type_t sa_attr;
/* the following are private to the sa framework */
void *sa_addr;
uint16_t sa_buftype;
uint16_t sa_size;
} sa_bulk_attr_t;
/*
* special macro for adding entries for bulk attr support
* bulk - sa_bulk_attr_t
* count - integer that will be incremented during each add
* attr - attribute to manipulate
* func - function for accessing data.
* data - pointer to data.
* len - length of data
*/
#define SA_ADD_BULK_ATTR(b, idx, attr, func, data, len) \
{ \
b[idx].sa_attr = attr;\
b[idx].sa_data_func = func; \
b[idx].sa_data = data; \
b[idx++].sa_length = len; \
}
typedef struct sa_os sa_os_t;
typedef enum sa_handle_type {
SA_HDL_SHARED,
SA_HDL_PRIVATE
} sa_handle_type_t;
struct sa_handle;
typedef void *sa_lookup_tab_t;
typedef struct sa_handle sa_handle_t;
typedef void (sa_update_cb_t)(sa_handle_t *, dmu_tx_t *tx);
int sa_handle_get(objset_t *, uint64_t, void *userp,
sa_handle_type_t, sa_handle_t **);
int sa_handle_get_from_db(objset_t *, dmu_buf_t *, void *userp,
sa_handle_type_t, sa_handle_t **);
void sa_handle_destroy(sa_handle_t *);
int sa_buf_hold(objset_t *, uint64_t, void *, dmu_buf_t **);
void sa_buf_rele(dmu_buf_t *, void *);
int sa_lookup(sa_handle_t *, sa_attr_type_t, void *buf, uint32_t buflen);
int sa_update(sa_handle_t *, sa_attr_type_t, void *buf,
uint32_t buflen, dmu_tx_t *);
int sa_remove(sa_handle_t *, sa_attr_type_t, dmu_tx_t *);
int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
int sa_size(sa_handle_t *, sa_attr_type_t, int *);
int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
void sa_update_user(sa_handle_t *, sa_handle_t *);
void *sa_get_userdata(sa_handle_t *);
void sa_set_userp(sa_handle_t *, void *);
dmu_buf_t *sa_get_db(sa_handle_t *);
uint64_t sa_handle_object(sa_handle_t *);
boolean_t sa_attr_would_spill(sa_handle_t *, sa_attr_type_t, int size);
void sa_register_update_callback(objset_t *, sa_update_cb_t *);
sa_attr_type_t *sa_setup(objset_t *, uint64_t, sa_attr_reg_t *, int);
void sa_tear_down(objset_t *);
int sa_replace_all_by_template(sa_handle_t *, sa_bulk_attr_t *,
int, dmu_tx_t *);
int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *,
int, dmu_tx_t *);
boolean_t sa_enabled(objset_t *);
void sa_cache_init();
void sa_cache_fini();
int sa_set_sa_object(objset_t *, uint64_t);
int sa_hdrsize(void *);
void sa_handle_lock(sa_handle_t *);
void sa_handle_unlock(sa_handle_t *);
#ifdef _KERNEL
int sa_lookup_uio(sa_handle_t *, sa_attr_type_t, uio_t *);
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SA_H */

View File

@ -0,0 +1,288 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_SA_IMPL_H
#define _SYS_SA_IMPL_H
#include <sys/dmu.h>
#include <sys/refcount.h>
#include <sys/list.h>
/*
* Array of known attributes and their
* various characteristics.
*/
typedef struct sa_attr_table {
sa_attr_type_t sa_attr;
uint8_t sa_registered;
uint16_t sa_length;
sa_bswap_type_t sa_byteswap;
char *sa_name;
} sa_attr_table_t;
/*
* Zap attribute format for attribute registration
*
* 64 56 48 40 32 24 16 8 0
* +-------+-------+-------+-------+-------+-------+-------+-------+
* | unused | len | bswap | attr num |
* +-------+-------+-------+-------+-------+-------+-------+-------+
*
* Zap attribute format for layout information.
*
* layout information is stored as an array of attribute numbers
* The name of the attribute is the layout number (0, 1, 2, ...)
*
* 16 0
* +---- ---+
* | attr # |
* +--------+
* | attr # |
* +--- ----+
* ......
*
*/
#define ATTR_BSWAP(x) BF32_GET(x, 16, 8)
#define ATTR_LENGTH(x) BF32_GET(x, 24, 16)
#define ATTR_NUM(x) BF32_GET(x, 0, 16)
#define ATTR_ENCODE(x, attr, length, bswap) \
{ \
BF64_SET(x, 24, 16, length); \
BF64_SET(x, 16, 8, bswap); \
BF64_SET(x, 0, 16, attr); \
}
#define TOC_OFF(x) BF32_GET(x, 0, 23)
#define TOC_ATTR_PRESENT(x) BF32_GET(x, 31, 1)
#define TOC_LEN_IDX(x) BF32_GET(x, 24, 4)
#define TOC_ATTR_ENCODE(x, len_idx, offset) \
{ \
BF32_SET(x, 31, 1, 1); \
BF32_SET(x, 24, 7, len_idx); \
BF32_SET(x, 0, 24, offset); \
}
#define SA_LAYOUTS "LAYOUTS"
#define SA_REGISTRY "REGISTRY"
/*
* Each unique layout will have their own table
* sa_lot (layout_table)
*/
typedef struct sa_lot {
avl_node_t lot_num_node;
avl_node_t lot_hash_node;
uint64_t lot_num;
uint64_t lot_hash;
sa_attr_type_t *lot_attrs; /* array of attr #'s */
uint32_t lot_var_sizes; /* how many aren't fixed size */
uint32_t lot_attr_count; /* total attr count */
list_t lot_idx_tab; /* should be only a couple of entries */
int lot_instance; /* used with lot_hash to identify entry */
} sa_lot_t;
/* index table of offsets */
typedef struct sa_idx_tab {
list_node_t sa_next;
sa_lot_t *sa_layout;
uint16_t *sa_variable_lengths;
refcount_t sa_refcount;
uint32_t *sa_idx_tab; /* array of offsets */
} sa_idx_tab_t;
/*
* Since the offset/index information into the actual data
* will usually be identical we can share that information with
* all handles that have the exact same offsets.
*
* You would typically only have a large number of different table of
* contents if you had a several variable sized attributes.
*
* Two AVL trees are used to track the attribute layout numbers.
* one is keyed by number and will be consulted when a DMU_OT_SA
* object is first read. The second tree is keyed by the hash signature
* of the attributes and will be consulted when an attribute is added
* to determine if we already have an instance of that layout. Both
* of these tree's are interconnected. The only difference is that
* when an entry is found in the "hash" tree the list of attributes will
* need to be compared against the list of attributes you have in hand.
* The assumption is that typically attributes will just be updated and
* adding a completely new attribute is a very rare operation.
*/
struct sa_os {
kmutex_t sa_lock;
boolean_t sa_need_attr_registration;
boolean_t sa_force_spill;
uint64_t sa_master_obj;
uint64_t sa_reg_attr_obj;
uint64_t sa_layout_attr_obj;
int sa_num_attrs;
sa_attr_table_t *sa_attr_table; /* private attr table */
sa_update_cb_t *sa_update_cb;
avl_tree_t sa_layout_num_tree; /* keyed by layout number */
avl_tree_t sa_layout_hash_tree; /* keyed by layout hash value */
int sa_user_table_sz;
sa_attr_type_t *sa_user_table; /* user name->attr mapping table */
};
/*
* header for all bonus and spill buffers.
* The header has a fixed portion with a variable number
* of "lengths" depending on the number of variable sized
* attribues which are determined by the "layout number"
*/
#define SA_MAGIC 0x2F505A /* ZFS SA */
typedef struct sa_hdr_phys {
uint32_t sa_magic;
uint16_t sa_layout_info; /* Encoded with hdrsize and layout number */
uint16_t sa_lengths[1]; /* optional sizes for variable length attrs */
/* ... Data follows the lengths. */
} sa_hdr_phys_t;
/*
* sa_hdr_phys -> sa_layout_info
*
* 16 10 0
* +--------+-------+
* | hdrsz |layout |
* +--------+-------+
*
* Bits 0-10 are the layout number
* Bits 11-16 are the size of the header.
* The hdrsize is the number * 8
*
* For example.
* hdrsz of 1 ==> 8 byte header
* 2 ==> 16 byte header
*
*/
#define SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
#define SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
#define SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
{ \
BF32_SET_SB(x, 10, 6, 3, 0, size); \
BF32_SET(x, 0, 10, num); \
}
typedef enum sa_buf_type {
SA_BONUS = 1,
SA_SPILL = 2
} sa_buf_type_t;
typedef enum sa_data_op {
SA_LOOKUP,
SA_UPDATE,
SA_ADD,
SA_REPLACE,
SA_REMOVE
} sa_data_op_t;
/*
* Opaque handle used for most sa functions
*
* This needs to be kept as small as possible.
*/
struct sa_handle {
kmutex_t sa_lock;
dmu_buf_t *sa_bonus;
dmu_buf_t *sa_spill;
objset_t *sa_os;
void *sa_userp;
sa_idx_tab_t *sa_bonus_tab; /* idx of bonus */
sa_idx_tab_t *sa_spill_tab; /* only present if spill activated */
};
#define SA_GET_DB(hdl, type) \
(dmu_buf_impl_t *)((type == SA_BONUS) ? hdl->sa_bonus : hdl->sa_spill)
#define SA_GET_HDR(hdl, type) \
((sa_hdr_phys_t *)((dmu_buf_impl_t *)(SA_GET_DB(hdl, \
type))->db.db_data))
#define SA_IDX_TAB_GET(hdl, type) \
(type == SA_BONUS ? hdl->sa_bonus_tab : hdl->sa_spill_tab)
#define IS_SA_BONUSTYPE(a) \
((a == DMU_OT_SA) ? B_TRUE : B_FALSE)
#define SA_BONUSTYPE_FROM_DB(db) \
(((dmu_buf_impl_t *)db)->db_dnode->dn_bonustype)
#define SA_BLKPTR_SPACE (DN_MAX_BONUSLEN - sizeof (blkptr_t))
#define SA_LAYOUT_NUM(x, type) \
((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
((SA_HDR_LAYOUT_NUM(x)) == 0)) ? 1 : SA_HDR_LAYOUT_NUM(x))))
#define SA_REGISTERED_LEN(sa, attr) sa->sa_attr_table[attr].sa_length
#define SA_ATTR_LEN(sa, idx, attr, hdr) ((SA_REGISTERED_LEN(sa, attr) == 0) ?\
hdr->sa_lengths[TOC_LEN_IDX(idx->sa_idx_tab[attr])] : \
SA_REGISTERED_LEN(sa, attr))
#define SA_SET_HDR(hdr, num, size) \
{ \
hdr->sa_magic = SA_MAGIC; \
SA_HDR_LAYOUT_INFO_ENCODE(hdr->sa_layout_info, num, size); \
}
#define SA_ATTR_INFO(sa, idx, hdr, attr, bulk, type, hdl) \
{ \
bulk.sa_size = SA_ATTR_LEN(sa, idx, attr, hdr); \
bulk.sa_buftype = type; \
bulk.sa_addr = \
(void *)((uintptr_t)TOC_OFF(idx->sa_idx_tab[attr]) + \
(uintptr_t)hdr); \
}
#define SA_HDR_SIZE_MATCH_LAYOUT(hdr, tb) \
(SA_HDR_SIZE(hdr) == (sizeof (sa_hdr_phys_t) + \
(tb->lot_var_sizes > 1 ? P2ROUNDUP((tb->lot_var_sizes - 1) * \
sizeof (uint16_t), 8) : 0)))
int sa_add_impl(sa_handle_t *, sa_attr_type_t,
uint32_t, sa_data_locator_t, void *, dmu_tx_t *);
void sa_register_update_callback_locked(objset_t *, sa_update_cb_t *);
int sa_size_locked(sa_handle_t *, sa_attr_type_t, int *);
void sa_default_locator(void **, uint32_t *, uint32_t, boolean_t, void *);
int sa_attr_size(sa_os_t *, sa_idx_tab_t *, sa_attr_type_t,
uint16_t *, sa_hdr_phys_t *);
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SA_IMPL_H */

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SPA_H
@ -43,8 +42,13 @@ extern "C" {
typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct metaslab_group metaslab_group_t;
typedef struct metaslab_class metaslab_class_t;
typedef struct zio zio_t;
typedef struct zilog zilog_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
typedef struct ddt ddt_t;
typedef struct ddt_entry ddt_entry_t;
struct dsl_pool;
/*
@ -134,15 +138,15 @@ typedef struct zio_cksum {
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 5 |G| offset3 |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE |
* 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 7 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 8 | padding |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* 9 | padding |
* 9 | physical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* a | birth txg |
* a | logical birth txg |
* +-------+-------+-------+-------+-------+-------+-------+-------+
* b | fill count |
* +-------+-------+-------+-------+-------+-------+-------+-------+
@ -166,25 +170,29 @@ typedef struct zio_cksum {
* cksum checksum function
* comp compression function
* G gang block indicator
* E endianness
* type DMU object type
* B byteorder (endianness)
* D dedup
* X unused
* lvl level of indirection
* birth txg transaction group in which the block was born
* type DMU object type
* phys birth txg of block allocation; zero if same as logical birth txg
* log. birth transaction group in which the block was logically born
* fill count number of non-zero blocks under this bp
* checksum[4] 256-bit checksum of the data this bp describes
*/
typedef struct blkptr {
dva_t blk_dva[3]; /* 128-bit Data Virtual Address */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[3]; /* Extra space for the future */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
typedef struct blkptr {
dva_t blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
uint64_t blk_prop; /* size, compression, type, etc */
uint64_t blk_pad[2]; /* Extra space for the future */
uint64_t blk_phys_birth; /* txg when block was allocated */
uint64_t blk_birth; /* transaction group at birth */
uint64_t blk_fill; /* fill count */
zio_cksum_t blk_cksum; /* 256-bit checksum */
} blkptr_t;
/*
* Macros to get and set fields in a bp or DVA.
*/
@ -208,8 +216,7 @@ typedef struct blkptr {
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
#define BP_GET_LSIZE(bp) \
(BP_IS_HOLE(bp) ? 0 : \
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1))
BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
#define BP_SET_LSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
@ -218,20 +225,35 @@ typedef struct blkptr {
#define BP_SET_PSIZE(bp, x) \
BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1))
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
#define BP_PHYSICAL_BIRTH(bp) \
((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
#define BP_SET_BIRTH(bp, logical, physical) \
{ \
(bp)->blk_birth = (logical); \
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
}
#define BP_GET_ASIZE(bp) \
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
@ -239,7 +261,7 @@ typedef struct blkptr {
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
@ -255,6 +277,12 @@ typedef struct blkptr {
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
(dva1)->dva_word[0] == (dva2)->dva_word[0])
#define BP_EQUAL(bp1, bp2) \
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
((zc1).zc_word[1] - (zc2).zc_word[1]) | \
@ -274,7 +302,10 @@ typedef struct blkptr {
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
#define BP_IS_HOLE(bp) ((bp)->blk_birth == 0)
#define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
/* BP_IS_RAIDZ(bp) assumes no block compression */
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
BP_GET_PSIZE(bp))
#define BP_ZERO(bp) \
{ \
@ -287,14 +318,12 @@ typedef struct blkptr {
(bp)->blk_prop = 0; \
(bp)->blk_pad[0] = 0; \
(bp)->blk_pad[1] = 0; \
(bp)->blk_pad[2] = 0; \
(bp)->blk_phys_birth = 0; \
(bp)->blk_birth = 0; \
(bp)->blk_fill = 0; \
ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \
}
#define BLK_FILL_ALREADY_FREED (-1ULL)
/*
* Note: the byteorder is either 0 or -1, both of which are palindromes.
* This simplifies the endianness handling a bit.
@ -309,17 +338,81 @@ typedef struct blkptr {
#define BP_SPRINTF_LEN 320
/*
* This macro allows code sharing between zfs, libzpool, and mdb.
* 'func' is either snprintf() or mdb_snprintf().
* 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
*/
#define SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress) \
{ \
static const char *copyname[] = \
{ "zero", "single", "double", "triple" }; \
int size = BP_SPRINTF_LEN; \
int len = 0; \
int copies = 0; \
\
if (bp == NULL) { \
len = func(buf + len, size - len, "<NULL>"); \
} else if (BP_IS_HOLE(bp)) { \
len = func(buf + len, size - len, "<hole>"); \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \
if (DVA_IS_VALID(dva)) \
copies++; \
len += func(buf + len, size - len, \
"DVA[%d]=<%llu:%llx:%llx>%c", d, \
(u_longlong_t)DVA_GET_VDEV(dva), \
(u_longlong_t)DVA_GET_OFFSET(dva), \
(u_longlong_t)DVA_GET_ASIZE(dva), \
ws); \
} \
if (BP_IS_GANG(bp) && \
DVA_GET_ASIZE(&bp->blk_dva[2]) <= \
DVA_GET_ASIZE(&bp->blk_dva[1]) / 2) \
copies--; \
len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
checksum, \
compress, \
BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", \
BP_IS_GANG(bp) ? "gang" : "contiguous", \
BP_GET_DEDUP(bp) ? "dedup" : "unique", \
copyname[copies], \
ws, \
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)BP_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth, \
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
(u_longlong_t)bp->blk_fill, \
ws, \
(u_longlong_t)bp->blk_cksum.zc_word[0], \
(u_longlong_t)bp->blk_cksum.zc_word[1], \
(u_longlong_t)bp->blk_cksum.zc_word[2], \
(u_longlong_t)bp->blk_cksum.zc_word[3]); \
} \
ASSERT(len < size); \
}
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
/*
* Routines found in spa.c
*/
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
SPA_IMPORT_ASSEMBLE
} spa_import_type_t;
/* state manipulation functions */
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
@ -338,6 +431,8 @@ extern void spa_async_suspend(spa_t *spa);
extern void spa_async_resume(spa_t *spa);
extern spa_t *spa_inject_addref(char *pool);
extern void spa_inject_delref(spa_t *spa);
extern void spa_scan_stat_init(spa_t *spa);
extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
#define SPA_ASYNC_CONFIG_UPDATE 0x01
#define SPA_ASYNC_REMOVE 0x02
@ -345,6 +440,14 @@ extern void spa_inject_delref(spa_t *spa);
#define SPA_ASYNC_RESILVER_DONE 0x08
#define SPA_ASYNC_RESILVER 0x10
#define SPA_ASYNC_AUTOEXPAND 0x20
#define SPA_ASYNC_REMOVE_DONE 0x40
#define SPA_ASYNC_REMOVE_STOP 0x80
/*
* Controls the behavior of spa_vdev_remove().
*/
#define SPA_REMOVE_UNSPARE 0x01
#define SPA_REMOVE_DONE 0x02
/* device manipulation */
extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
@ -353,8 +456,11 @@ extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
int replace_done);
extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
extern boolean_t spa_vdev_remove_active(spa_t *spa);
extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
nvlist_t *props, boolean_t exp);
/* spare state (which is global across all pools) */
extern void spa_spare_add(vdev_t *vd);
@ -368,15 +474,23 @@ extern void spa_l2cache_remove(vdev_t *vd);
extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
extern void spa_l2cache_activate(vdev_t *vd);
extern void spa_l2cache_drop(spa_t *spa);
extern void spa_l2cache_space_update(vdev_t *vd, int64_t space, int64_t alloc);
/* scrubbing */
extern int spa_scrub(spa_t *spa, pool_scrub_type_t type);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
extern void spa_sync_allpools(void);
/*
* DEFERRED_FREE must be large enough that regular blocks are not
* deferred. XXX so can't we change it back to 1?
*/
#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */
#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */
/* spa namespace global mutex */
extern kmutex_t spa_namespace_lock;
@ -394,7 +508,6 @@ extern void spa_config_set(spa_t *spa, nvlist_t *config);
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
int getstats);
extern void spa_config_update(spa_t *spa, int what);
extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/*
* Miscellaneous SPA routines in spa_misc.c
@ -402,7 +515,7 @@ extern void spa_config_update_common(spa_t *spa, int what, boolean_t isroot);
/* Namespace manipulation */
extern spa_t *spa_lookup(const char *name);
extern spa_t *spa_add(const char *name, const char *altroot);
extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
extern void spa_remove(spa_t *spa);
extern spa_t *spa_next(spa_t *prev);
@ -411,6 +524,7 @@ extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_NONE 0x00
#define SCL_CONFIG 0x01
#define SCL_STATE 0x02
#define SCL_L2ARC 0x04 /* hack until L2ARC 2.0 */
@ -430,12 +544,30 @@ extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
/* Pool vdev add/remove lock */
extern uint64_t spa_vdev_enter(spa_t *spa);
extern uint64_t spa_vdev_config_enter(spa_t *spa);
extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
int error, char *tag);
extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
/* Pool vdev state change lock */
extern void spa_vdev_state_enter(spa_t *spa);
extern void spa_vdev_state_enter(spa_t *spa, int oplock);
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
/* Log state */
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
} spa_log_state_t;
extern spa_log_state_t spa_get_log_state(spa_t *spa);
extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
extern int spa_offline_log(spa_t *spa);
/* Log claim callback */
extern void spa_claim_notify(zio_t *zio);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
@ -447,18 +579,26 @@ extern char *spa_name(spa_t *spa);
extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern spa_load_state_t spa_load_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_get_dspace(spa_t *spa);
extern void spa_update_dspace(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa);
extern int spa_busy(void);
extern uint8_t spa_get_failmode(spa_t *spa);
extern boolean_t spa_suspended(spa_t *spa);
extern uint64_t spa_bootfs(spa_t *spa);
extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
/* Miscellaneous support routines */
extern int spa_rename(const char *oldname, const char *newname);
@ -466,18 +606,24 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
extern char *spa_strdup(const char *);
extern void spa_strfree(char *);
extern uint64_t spa_get_random(uint64_t range);
extern void sprintf_blkptr(char *buf, int len, const blkptr_t *bp);
extern uint64_t spa_generate_guid(spa_t *spa);
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
extern void spa_freeze(spa_t *spa);
extern void spa_upgrade(spa_t *spa, uint64_t version);
extern void spa_evict_all(void);
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
boolean_t l2cache);
extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
extern uint64_t bp_get_dasize(spa_t *spa, const blkptr_t *bp);
extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
extern boolean_t spa_has_slogs(spa_t *spa);
extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
extern void spa_rewind_data_to_nvlist(spa_t *spa, nvlist_t *to);
extern int spa_mode(spa_t *spa);
extern uint64_t strtonum(const char *str, char **nptr);
/* history logging */
typedef enum history_log_type {
@ -487,10 +633,11 @@ typedef enum history_log_type {
} history_log_type_t;
typedef struct history_arg {
const char *ha_history_str;
char *ha_history_str;
history_log_type_t ha_log_type;
history_internal_events_t ha_event;
char ha_zone[MAXPATHLEN];
char *ha_zone;
uid_t ha_uid;
} history_arg_t;
extern char *spa_his_ievent_table[];
@ -500,17 +647,17 @@ extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
char *his_buf);
extern int spa_history_log(spa_t *spa, const char *his_buf,
history_log_type_t what);
extern void spa_history_internal_log(history_internal_events_t event,
spa_t *spa, dmu_tx_t *tx, cred_t *cr, const char *fmt, ...);
extern void spa_history_log_internal(history_internal_events_t event,
spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
/* error handling */
struct zbookmark;
struct zio;
extern void spa_log_error(spa_t *spa, struct zio *zio);
extern void spa_log_error(spa_t *spa, zio_t *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t stateoroffset, uint64_t length);
zio_t *zio, uint64_t stateoroffset, uint64_t length);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
@ -541,7 +688,7 @@ extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
#define dprintf_bp(bp, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \
sprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \
sprintf_blkptr(__blkbuf, (bp)); \
dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \
kmem_free(__blkbuf, BP_SPRINTF_LEN); \
} \

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#ifndef _SYS_SPA_IMPL_H
@ -36,6 +35,7 @@
#include <sys/avl.h>
#include <sys/refcount.h>
#include <sys/bplist.h>
#include <sys/bpobj.h>
#ifdef __cplusplus
extern "C" {
@ -78,19 +78,33 @@ typedef struct spa_config_dirent {
char *scd_path;
} spa_config_dirent_t;
typedef enum spa_log_state {
SPA_LOG_UNKNOWN = 0, /* unknown log state */
SPA_LOG_MISSING, /* missing log(s) */
SPA_LOG_CLEAR, /* clear the log(s) */
SPA_LOG_GOOD, /* log(s) are good */
} spa_log_state_t;
enum zio_taskq_type {
ZIO_TASKQ_ISSUE = 0,
ZIO_TASKQ_ISSUE_HIGH,
ZIO_TASKQ_INTERRUPT,
ZIO_TASKQ_INTERRUPT_HIGH,
ZIO_TASKQ_TYPES
};
/*
* State machine for the zpool-pooname process. The states transitions
* are done as follows:
*
* From To Routine
* PROC_NONE -> PROC_CREATED spa_activate()
* PROC_CREATED -> PROC_ACTIVE spa_thread()
* PROC_ACTIVE -> PROC_DEACTIVATE spa_deactivate()
* PROC_DEACTIVATE -> PROC_GONE spa_thread()
* PROC_GONE -> PROC_NONE spa_deactivate()
*/
typedef enum spa_proc_state {
SPA_PROC_NONE, /* spa_proc = &p0, no process created */
SPA_PROC_CREATED, /* spa_activate() has proc, is waiting */
SPA_PROC_ACTIVE, /* taskqs created, spa_proc set */
SPA_PROC_DEACTIVATE, /* spa_deactivate() requests process exit */
SPA_PROC_GONE /* spa_thread() is exiting, spa_proc = &p0 */
} spa_proc_state_t;
struct spa {
/*
* Fields protected by spa_namespace_lock.
@ -99,6 +113,7 @@ struct spa {
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
nvlist_t *spa_config_syncing; /* currently syncing config */
nvlist_t *spa_config_splitting; /* config for splitting */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
pool_state_t spa_state; /* pool state */
@ -113,6 +128,8 @@ struct spa {
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
uint64_t spa_load_max_txg; /* best initial ub_txg */
uint64_t spa_claim_max_txg; /* highest claimed birth txg */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */
@ -122,21 +139,24 @@ struct spa {
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
bplist_t spa_sync_bplist; /* deferred-free bplist */
bpobj_t spa_deferred_bpobj; /* deferred-free bplist */
bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
uint64_t spa_scrub_inflight; /* in-flight scrub I/Os */
uint64_t spa_scrub_maxinflight; /* max in-flight scrub I/Os */
uint64_t spa_scrub_errors; /* scrub I/O error count */
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
uint8_t spa_scrub_active; /* active or suspended? */
uint8_t spa_scrub_type; /* type of scrub we're doing */
uint8_t spa_scrub_finished; /* indicator to rotate logs */
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
int spa_async_suspended; /* async tasks suspended */
@ -144,7 +164,14 @@ struct spa {
uint16_t spa_async_tasks; /* async task mask */
char *spa_root; /* alternate root directory */
uint64_t spa_ena; /* spa-wide ereport ENA */
boolean_t spa_last_open_failed; /* true if last open faled */
int spa_last_open_failed; /* error if last open failed */
uint64_t spa_last_ubsync_txg; /* "best" uberblock txg */
uint64_t spa_last_ubsync_txg_ts; /* timestamp from that ub */
uint64_t spa_load_txg; /* ub txg that loaded */
uint64_t spa_load_txg_ts; /* timestamp from that ub */
uint64_t spa_load_meta_errors; /* verify metadata err count */
uint64_t spa_load_data_errors; /* verify data err count */
uint64_t spa_verify_min_txg; /* start txg of verify scrub */
kmutex_t spa_errlog_lock; /* error log lock */
uint64_t spa_errlog_last; /* last error log object */
uint64_t spa_errlog_scrub; /* scrub error log object */
@ -166,11 +193,27 @@ struct spa {
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
kcondvar_t spa_suspend_cv; /* notification of resume */
uint8_t spa_suspended; /* pool is suspended */
uint8_t spa_claiming; /* pool is doing zil_claim() */
boolean_t spa_is_root; /* pool is root */
int spa_minref; /* num refs when first opened */
int spa_mode; /* FREAD | FWRITE */
spa_log_state_t spa_log_state; /* log state */
uint64_t spa_autoexpand; /* lun expansion on/off */
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
uint64_t spa_ddt_stat_object; /* DDT statistics */
uint64_t spa_dedup_ditto; /* dedup ditto threshold */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
kmutex_t spa_vdev_top_lock; /* dueling offline/remove */
kmutex_t spa_proc_lock; /* protects spa_proc* */
kcondvar_t spa_proc_cv; /* spa_proc_state transitions */
spa_proc_state_t spa_proc_state; /* see definition */
struct proc *spa_proc; /* "zpool-poolname" process */
uint64_t spa_did; /* if procp != p0, did of t1 */
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
uint64_t spa_prev_software_version;
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.
@ -183,12 +226,6 @@ struct spa {
extern const char *spa_config_path;
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
((compress) == ZIO_COMPRESS_ON && \
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
(compress) == ZIO_COMPRESS_OFF)
#ifdef __cplusplus
}
#endif

View File

@ -77,6 +77,7 @@ struct space_map_ops {
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
uint64_t (*smop_max)(space_map_t *sm);
boolean_t (*smop_fragmented)(space_map_t *sm);
};
/*

Some files were not shown because too many files have changed in this diff Show More