Linux ZVOL implementation; user-side changes

At last a useful user space interface for the Linux ZFS port arrives.
With the addition of the ZVOL real ZFS based block devices are available
and can be compared head to head with Linux's MD and LVM block drivers.
The Linux ZVOL has not yet had any performance work done but from a user
perspective it should be functionally complete and behave like any other
Linux block device.

The ZVOL has so far been tested using zconfig.sh on the following x86_64
based platforms: FC11, CHAOS4, RHEL5, RHEL6, and SLES11.  However, more
testing is required to ensure everything is working as designed.

What follows in a somewhat detailed list of changes includes in this
commit to make ZVOL's possible.  A few other issues were addressed in
the context of these changes which will also be mentioned.

* zvol_create_link_common() simplified to simply issue to ioctl to
create the device and then wait up to 10 seconds for it to appear.
The device will be created within a few miliseconds by udev under
/dev/<pool>/<volume>.  Note this naming convention is slightly
different than on Solaris by I feel is more Linuxy.

* Removed support for dump vdevs.  This concept is specific to Solaris
and done not map cleanly to Linux.  Under Linux generating system cores
is perferably done over the network via netdump, or alternately to a
block device via O_DIRECT.
This commit is contained in:
Brian Behlendorf 2009-11-20 12:00:08 -08:00
parent bf333c5408
commit aebe6818a9
10 changed files with 128 additions and 164 deletions

View File

@ -554,6 +554,7 @@ zfs_do_clone(int argc, char **argv)
ret = zfs_clone(zhp, argv[1], props);
/* create the mountpoint if necessary */
#ifdef HAVE_ZPL
if (ret == 0) {
zfs_handle_t *clone;
@ -564,6 +565,7 @@ zfs_do_clone(int argc, char **argv)
zfs_close(clone);
}
}
#endif /* HAVE_ZPL */
zfs_close(zhp);
nvlist_free(props);
@ -761,6 +763,7 @@ zfs_do_create(int argc, char **argv)
* in fact created, even if we failed to mount or share it.
*/
ret = 0;
#ifdef HAVE_ZPL
if (canmount == ZFS_CANMOUNT_ON) {
if (zfs_mount(zhp, NULL, 0) != 0) {
(void) fprintf(stderr, gettext("filesystem "
@ -772,6 +775,7 @@ zfs_do_create(int argc, char **argv)
ret = 1;
}
}
#endif /* HAVE_ZPL */
error:
if (zhp)
@ -2787,6 +2791,7 @@ typedef struct get_all_cbdata {
#define SPINNER_TIME 3 /* seconds */
#define MOUNT_TIME 5 /* seconds */
#ifdef HAVE_ZPL
static int
get_one_dataset(zfs_handle_t *zhp, void *data)
{
@ -3338,6 +3343,7 @@ share_mount(int op, int argc, char **argv)
return (ret);
}
#endif /* HAVE_ZPL */
/*
* zfs mount -a [nfs | iscsi]
@ -3348,7 +3354,11 @@ share_mount(int op, int argc, char **argv)
static int
zfs_do_mount(int argc, char **argv)
{
#ifdef HAVE_ZPL
return (share_mount(OP_MOUNT, argc, argv));
#else
return ENOSYS;
#endif /* HAVE_ZPL */
}
/*
@ -3360,9 +3370,14 @@ zfs_do_mount(int argc, char **argv)
static int
zfs_do_share(int argc, char **argv)
{
#ifdef HAVE_ZPL
return (share_mount(OP_SHARE, argc, argv));
#else
return ENOSYS;
#endif /* HAVE_ZPL */
}
#ifdef HAVE_ZPL
typedef struct unshare_unmount_node {
zfs_handle_t *un_zhp;
char *un_mountp;
@ -3815,6 +3830,7 @@ unshare_unmount(int op, int argc, char **argv)
return (ret);
}
#endif /* HAVE_ZPL */
/*
* zfs unmount -a
@ -3825,7 +3841,11 @@ unshare_unmount(int op, int argc, char **argv)
static int
zfs_do_unmount(int argc, char **argv)
{
#ifdef HAVE_ZPL
return (unshare_unmount(OP_MOUNT, argc, argv));
#else
return ENOSYS;
#endif /* HAVE_ZPL */
}
/*
@ -3837,7 +3857,11 @@ zfs_do_unmount(int argc, char **argv)
static int
zfs_do_unshare(int argc, char **argv)
{
#ifdef HAVE_ZPL
return (unshare_unmount(OP_SHARE, argc, argv));
#else
return ENOSYS;
#endif /* HAVE_ZPL */
}
/* ARGSUSED */
@ -3853,6 +3877,7 @@ zfs_do_python(int argc, char **argv)
* Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is
* 'legacy'. Otherwise, complain that use should be using 'zfs mount'.
*/
#ifdef HAVE_ZPL
static int
manual_mount(int argc, char **argv)
{
@ -3983,6 +4008,7 @@ manual_unmount(int argc, char **argv)
return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE));
}
#endif /* HAVE_ZPL */
static int
volcheck(zpool_handle_t *zhp, void *data)
@ -4027,7 +4053,9 @@ main(int argc, char **argv)
{
int ret;
int i = 0;
#ifdef HAVE_ZPL
char *progname;
#endif
char *cmdname;
(void) setlocale(LC_ALL, "");
@ -4052,6 +4080,7 @@ main(int argc, char **argv)
return (1);
}
#ifdef HAVE_ZPL
/*
* This command also doubles as the /etc/fs mount and unmount program.
* Determine if we should take this behavior based on argv[0].
@ -4062,6 +4091,9 @@ main(int argc, char **argv)
} else if (strcmp(progname, "umount") == 0) {
ret = manual_unmount(argc, argv);
} else {
#else
{
#endif /* HAVE_ZPL */
/*
* Make sure the user has specified some command.
*/

View File

@ -751,17 +751,20 @@ main(int argc, char **argv)
if (dataset[0] != '\0' && domount) {
if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
return (1);
#ifdef HAVE_ZPL
if (zfs_unmount(zhp, NULL, 0) != 0)
return (1);
#endif /* HAVE_ZPL */
}
record.zi_error = error;
ret = register_handler(pool, flags, &record, quiet);
#ifdef HAVE_ZPL
if (dataset[0] != '\0' && domount)
ret = (zfs_mount(zhp, NULL, 0) != 0);
#endif /* HAVE_ZPL */
libzfs_fini(g_zfs);

View File

@ -696,7 +696,9 @@ zpool_do_create(int argc, char **argv)
(strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
char buf[MAXPATHLEN];
#ifdef HAVE_ZPL
DIR *dirp;
#endif
if (mountpoint && mountpoint[0] != '/') {
(void) fprintf(stderr, gettext("invalid mountpoint "
@ -721,6 +723,7 @@ zpool_do_create(int argc, char **argv)
mountpoint);
}
#ifdef HAVE_ZPL
if ((dirp = opendir(buf)) == NULL && errno != ENOENT) {
(void) fprintf(stderr, gettext("mountpoint '%s' : "
"%s\n"), buf, strerror(errno));
@ -743,6 +746,7 @@ zpool_do_create(int argc, char **argv)
goto errout;
}
}
#endif /* HAVE_ZPL */
}
if (dryrun) {
@ -773,8 +777,12 @@ zpool_do_create(int argc, char **argv)
zfs_prop_to_name(
ZFS_PROP_MOUNTPOINT),
mountpoint) == 0);
#ifdef HAVE_ZPL
if (zfs_mount(pool, NULL, 0) == 0)
ret = zfs_shareall(pool);
#else
ret = 0;
#endif /* HAVE_ZPL */
zfs_close(pool);
}
} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
@ -1531,11 +1539,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
return (1);
#if HAVE_ZPL
if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
zpool_enable_datasets(zhp, mntopts, 0) != 0) {
zpool_close(zhp);
return (1);
}
#endif /* HAVE_ZPL */
zpool_close(zhp);
return (error);

View File

@ -602,9 +602,6 @@ extern int zpool_read_label(int, nvlist_t **);
extern int zpool_create_zvol_links(zpool_handle_t *);
extern int zpool_remove_zvol_links(zpool_handle_t *);
/* is this zvol valid for use as a dump device? */
extern int zvol_check_dump_config(char *);
/*
* Management interfaces for SMB ACL files
*/

View File

@ -93,6 +93,7 @@ struct prop_changelist {
int
changelist_prefix(prop_changelist_t *clp)
{
#ifdef HAVE_ZPL
prop_changenode_t *cn;
int ret = 0;
@ -168,6 +169,9 @@ changelist_prefix(prop_changelist_t *clp)
(void) changelist_postfix(clp);
return (ret);
#else
return 0;
#endif /* HAVE_ZPL */
}
/*
@ -182,6 +186,7 @@ changelist_prefix(prop_changelist_t *clp)
int
changelist_postfix(prop_changelist_t *clp)
{
#ifdef HAVE_ZPL
prop_changenode_t *cn;
char shareopts[ZFS_MAXPROPLEN];
int errors = 0;
@ -306,6 +311,9 @@ changelist_postfix(prop_changelist_t *clp)
}
return (errors ? -1 : 0);
#else
return 0;
#endif /* HAVE_ZPL */
}
/*
@ -368,6 +376,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst)
int
changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
{
#ifdef HAVE_ZPL
prop_changenode_t *cn;
int ret = 0;
@ -382,6 +391,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto)
}
return (ret);
#else
return 0;
#endif
}
/*

View File

@ -965,6 +965,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
/*FALLTHRU*/
#ifdef HAVE_ZPL
case ZFS_PROP_SHARESMB:
case ZFS_PROP_SHARENFS:
/*
@ -1075,6 +1076,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
}
break;
#endif /* HAVE_ZPL */
case ZFS_PROP_UTF8ONLY:
chosen_utf = (int)intval;
break;
@ -2522,6 +2524,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
goto ancestorerr;
}
#ifdef HAVE_ZPL
if (zfs_mount(h, NULL, 0) != 0) {
opname = dgettext(TEXT_DOMAIN, "mount");
goto ancestorerr;
@ -2531,6 +2534,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
opname = dgettext(TEXT_DOMAIN, "share");
goto ancestorerr;
}
#endif /* HAVE_ZPL */
zfs_close(h);
}
@ -3620,7 +3624,7 @@ error:
/*
* Given a zvol dataset, issue the ioctl to create the appropriate minor node,
* poke devfsadm to create the /dev link, and then wait for the link to appear.
* and wait briefly for udev to create the /dev link.
*/
int
zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
@ -3632,9 +3636,8 @@ static int
zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
{
zfs_cmd_t zc = { "\0", "\0", "\0", 0 };
di_devlink_handle_t dhdl;
priv_set_t *priv_effective;
int privileged;
char path[MAXPATHLEN];
int error;
(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
@ -3671,52 +3674,13 @@ zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
}
/*
* If privileged call devfsadm and wait for the links to
* magically appear.
* Otherwise, print out an informational message.
* Wait up to 10 seconds for udev to create the device.
*/
priv_effective = priv_allocset();
(void) getppriv(PRIV_EFFECTIVE, priv_effective);
privileged = (priv_isfullset(priv_effective) == B_TRUE);
priv_freeset(priv_effective);
if (privileged) {
if ((dhdl = di_devlink_init(ZFS_DRIVER,
DI_MAKE_LINK)) == NULL) {
zfs_error_aux(hdl, strerror(errno));
(void) zfs_error_fmt(hdl, errno,
dgettext(TEXT_DOMAIN, "cannot create device links "
"for '%s'"), dataset);
(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
return (-1);
} else {
(void) di_devlink_fini(&dhdl);
}
} else {
char pathname[MAXPATHLEN];
struct stat64 statbuf;
int i;
#define MAX_WAIT 10
/*
* This is the poor mans way of waiting for the link
* to show up. If after 10 seconds we still don't
* have it, then print out a message.
*/
(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
dataset);
for (i = 0; i != MAX_WAIT; i++) {
if (stat64(pathname, &statbuf) == 0)
break;
(void) sleep(1);
}
if (i == MAX_WAIT)
(void) printf(gettext("%s may not be immediately "
"available\n"), pathname);
}
(void) snprintf(path, sizeof (path), "/dev/%s", dataset);
error = zpool_label_disk_wait(path, 10000);
if (error)
(void) printf(gettext("%s may not be immediately "
"available\n"), path);
return (0);
}
@ -3852,6 +3816,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
return (0);
}
#ifdef HAVE_ZPL
int
zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
{
@ -3917,6 +3882,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
return (error);
}
#endif /* HAVE_ZPL */
void
zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)

View File

@ -85,6 +85,7 @@
#include <sys/systeminfo.h>
#define MAXISALEN 257 /* based on sysinfo(2) man page */
#ifdef HAVE_ZPL
static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
zfs_share_proto_t);
@ -1225,7 +1226,6 @@ out:
return (ret);
}
static int
zvol_cb(const char *dataset, void *data)
{
@ -1398,3 +1398,53 @@ out:
return (ret);
}
#else /* HAVE_ZPL */
int
zfs_unshare_iscsi(zfs_handle_t *zhp)
{
return 0;
}
int
zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
{
return 0;
}
void
remove_mountpoint(zfs_handle_t *zhp) {
return;
}
boolean_t
is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
{
return B_FALSE;
}
boolean_t
zfs_is_mounted(zfs_handle_t *zhp, char **where)
{
return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where);
}
boolean_t
zfs_is_shared(zfs_handle_t *zhp)
{
return B_FALSE;
}
int
zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
{
return B_FALSE;
}
int
zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
{
return B_FALSE;
}
#endif /* HAVE_ZPL */

View File

@ -3285,113 +3285,3 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
return 0;
}
static boolean_t
supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
{
char *type;
nvlist_t **child;
uint_t children, c;
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
strcmp(type, VDEV_TYPE_FILE) == 0 ||
strcmp(type, VDEV_TYPE_LOG) == 0 ||
strcmp(type, VDEV_TYPE_MISSING) == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"vdev type '%s' is not supported"), type);
(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
return (B_FALSE);
}
if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
for (c = 0; c < children; c++) {
if (!supported_dump_vdev_type(hdl, child[c], errbuf))
return (B_FALSE);
}
}
return (B_TRUE);
}
/*
* check if this zvol is allowable for use as a dump device; zero if
* it is, > 0 if it isn't, < 0 if it isn't a zvol
*/
int
zvol_check_dump_config(char *arg)
{
zpool_handle_t *zhp = NULL;
nvlist_t *config, *nvroot;
char *p, *volname;
nvlist_t **top;
uint_t toplevels;
libzfs_handle_t *hdl;
char errbuf[1024];
char poolname[ZPOOL_MAXNAMELEN];
int pathlen = strlen(ZVOL_FULL_DEV_DIR);
int ret = 1;
if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
return (-1);
}
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"dump is not supported on device '%s'"), arg);
if ((hdl = libzfs_init()) == NULL)
return (1);
libzfs_print_on_error(hdl, B_TRUE);
volname = arg + pathlen;
/* check the configuration of the pool */
if ((p = strchr(volname, '/')) == NULL) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"malformed dataset name"));
(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
return (1);
} else if (p - volname >= ZFS_MAXNAMELEN) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"dataset name is too long"));
(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
return (1);
} else {
(void) strncpy(poolname, volname, p - volname);
poolname[p - volname] = '\0';
}
if ((zhp = zpool_open(hdl, poolname)) == NULL) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"could not open pool '%s'"), poolname);
(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
goto out;
}
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"could not obtain vdev configuration for '%s'"), poolname);
(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
goto out;
}
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&top, &toplevels) == 0);
if (toplevels != 1) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' has multiple top level vdevs"), poolname);
(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
goto out;
}
if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
goto out;
}
ret = 0;
out:
if (zhp)
zpool_close(zhp);
libzfs_fini(hdl);
return (ret);
}

View File

@ -1974,6 +1974,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
* if we did a replication receive (indicated by stream_avl
* being non-NULL).
*/
#ifdef HAVE_ZPL
cp = strchr(zc.zc_value, '@');
if (cp && (ioctl_err == 0 || !newfs)) {
zfs_handle_t *h;
@ -2000,6 +2001,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
}
*cp = '@';
}
#endif /* HAVE_ZPL */
if (clp) {
err |= changelist_postfix(clp);

View File

@ -605,7 +605,9 @@ libzfs_fini(libzfs_handle_t *hdl)
(void) fclose(hdl->libzfs_mnttab);
if (hdl->libzfs_sharetab)
(void) fclose(hdl->libzfs_sharetab);
#ifdef HAVE_ZPL
zfs_uninit_libshare(hdl);
#endif
if (hdl->libzfs_log_str)
(void) free(hdl->libzfs_log_str);
zpool_free_handles(hdl);