From aebe6818a9a7454b0da5dcf63d45f8fa83c36ae7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 Nov 2009 12:00:08 -0800 Subject: [PATCH] Linux ZVOL implementation; user-side changes At last a useful user space interface for the Linux ZFS port arrives. With the addition of the ZVOL real ZFS based block devices are available and can be compared head to head with Linux's MD and LVM block drivers. The Linux ZVOL has not yet had any performance work done but from a user perspective it should be functionally complete and behave like any other Linux block device. The ZVOL has so far been tested using zconfig.sh on the following x86_64 based platforms: FC11, CHAOS4, RHEL5, RHEL6, and SLES11. However, more testing is required to ensure everything is working as designed. What follows in a somewhat detailed list of changes includes in this commit to make ZVOL's possible. A few other issues were addressed in the context of these changes which will also be mentioned. * zvol_create_link_common() simplified to simply issue to ioctl to create the device and then wait up to 10 seconds for it to appear. The device will be created within a few miliseconds by udev under /dev//. Note this naming convention is slightly different than on Solaris by I feel is more Linuxy. * Removed support for dump vdevs. This concept is specific to Solaris and done not map cleanly to Linux. Under Linux generating system cores is perferably done over the network via netdump, or alternately to a block device via O_DIRECT. --- cmd/zfs/zfs_main.c | 32 ++++++++++ cmd/zinject/zinject.c | 5 +- cmd/zpool/zpool_main.c | 10 +++ lib/libzfs/include/libzfs.h | 3 - lib/libzfs/libzfs_changelist.c | 12 ++++ lib/libzfs/libzfs_dataset.c | 64 +++++-------------- lib/libzfs/libzfs_mount.c | 52 +++++++++++++++- lib/libzfs/libzfs_pool.c | 110 --------------------------------- lib/libzfs/libzfs_sendrecv.c | 2 + lib/libzfs/libzfs_util.c | 2 + 10 files changed, 128 insertions(+), 164 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 91b85ed6b8..618ae10225 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -554,6 +554,7 @@ zfs_do_clone(int argc, char **argv) ret = zfs_clone(zhp, argv[1], props); /* create the mountpoint if necessary */ +#ifdef HAVE_ZPL if (ret == 0) { zfs_handle_t *clone; @@ -564,6 +565,7 @@ zfs_do_clone(int argc, char **argv) zfs_close(clone); } } +#endif /* HAVE_ZPL */ zfs_close(zhp); nvlist_free(props); @@ -761,6 +763,7 @@ zfs_do_create(int argc, char **argv) * in fact created, even if we failed to mount or share it. */ ret = 0; +#ifdef HAVE_ZPL if (canmount == ZFS_CANMOUNT_ON) { if (zfs_mount(zhp, NULL, 0) != 0) { (void) fprintf(stderr, gettext("filesystem " @@ -772,6 +775,7 @@ zfs_do_create(int argc, char **argv) ret = 1; } } +#endif /* HAVE_ZPL */ error: if (zhp) @@ -2787,6 +2791,7 @@ typedef struct get_all_cbdata { #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ +#ifdef HAVE_ZPL static int get_one_dataset(zfs_handle_t *zhp, void *data) { @@ -3338,6 +3343,7 @@ share_mount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs mount -a [nfs | iscsi] @@ -3348,7 +3354,11 @@ share_mount(int op, int argc, char **argv) static int zfs_do_mount(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3360,9 +3370,14 @@ zfs_do_mount(int argc, char **argv) static int zfs_do_share(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } +#ifdef HAVE_ZPL typedef struct unshare_unmount_node { zfs_handle_t *un_zhp; char *un_mountp; @@ -3815,6 +3830,7 @@ unshare_unmount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs unmount -a @@ -3825,7 +3841,11 @@ unshare_unmount(int op, int argc, char **argv) static int zfs_do_unmount(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3837,7 +3857,11 @@ zfs_do_unmount(int argc, char **argv) static int zfs_do_unshare(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* ARGSUSED */ @@ -3853,6 +3877,7 @@ zfs_do_python(int argc, char **argv) * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. */ +#ifdef HAVE_ZPL static int manual_mount(int argc, char **argv) { @@ -3983,6 +4008,7 @@ manual_unmount(int argc, char **argv) return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); } +#endif /* HAVE_ZPL */ static int volcheck(zpool_handle_t *zhp, void *data) @@ -4027,7 +4053,9 @@ main(int argc, char **argv) { int ret; int i = 0; +#ifdef HAVE_ZPL char *progname; +#endif char *cmdname; (void) setlocale(LC_ALL, ""); @@ -4052,6 +4080,7 @@ main(int argc, char **argv) return (1); } +#ifdef HAVE_ZPL /* * This command also doubles as the /etc/fs mount and unmount program. * Determine if we should take this behavior based on argv[0]. @@ -4062,6 +4091,9 @@ main(int argc, char **argv) } else if (strcmp(progname, "umount") == 0) { ret = manual_unmount(argc, argv); } else { +#else + { +#endif /* HAVE_ZPL */ /* * Make sure the user has specified some command. */ diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index 09c377ef8d..0ad8549b24 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -751,17 +751,20 @@ main(int argc, char **argv) if (dataset[0] != '\0' && domount) { if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) return (1); - +#ifdef HAVE_ZPL if (zfs_unmount(zhp, NULL, 0) != 0) return (1); +#endif /* HAVE_ZPL */ } record.zi_error = error; ret = register_handler(pool, flags, &record, quiet); +#ifdef HAVE_ZPL if (dataset[0] != '\0' && domount) ret = (zfs_mount(zhp, NULL, 0) != 0); +#endif /* HAVE_ZPL */ libzfs_fini(g_zfs); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index b6c454d24b..ca3f37b900 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -696,7 +696,9 @@ zpool_do_create(int argc, char **argv) (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { char buf[MAXPATHLEN]; +#ifdef HAVE_ZPL DIR *dirp; +#endif if (mountpoint && mountpoint[0] != '/') { (void) fprintf(stderr, gettext("invalid mountpoint " @@ -721,6 +723,7 @@ zpool_do_create(int argc, char **argv) mountpoint); } +#ifdef HAVE_ZPL if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { (void) fprintf(stderr, gettext("mountpoint '%s' : " "%s\n"), buf, strerror(errno)); @@ -743,6 +746,7 @@ zpool_do_create(int argc, char **argv) goto errout; } } +#endif /* HAVE_ZPL */ } if (dryrun) { @@ -773,8 +777,12 @@ zpool_do_create(int argc, char **argv) zfs_prop_to_name( ZFS_PROP_MOUNTPOINT), mountpoint) == 0); +#ifdef HAVE_ZPL if (zfs_mount(pool, NULL, 0) == 0) ret = zfs_shareall(pool); +#else + ret = 0; +#endif /* HAVE_ZPL */ zfs_close(pool); } } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { @@ -1531,11 +1539,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); +#if HAVE_ZPL if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_close(zhp); return (1); } +#endif /* HAVE_ZPL */ zpool_close(zhp); return (error); diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index 21e0fdc5c6..f08356b49f 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -602,9 +602,6 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_create_zvol_links(zpool_handle_t *); extern int zpool_remove_zvol_links(zpool_handle_t *); -/* is this zvol valid for use as a dump device? */ -extern int zvol_check_dump_config(char *); - /* * Management interfaces for SMB ACL files */ diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 7eedffa53c..fb162cb5aa 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -93,6 +93,7 @@ struct prop_changelist { int changelist_prefix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -168,6 +169,9 @@ changelist_prefix(prop_changelist_t *clp) (void) changelist_postfix(clp); return (ret); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -182,6 +186,7 @@ changelist_prefix(prop_changelist_t *clp) int changelist_postfix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; char shareopts[ZFS_MAXPROPLEN]; int errors = 0; @@ -306,6 +311,9 @@ changelist_postfix(prop_changelist_t *clp) } return (errors ? -1 : 0); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -368,6 +376,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) int changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -382,6 +391,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) } return (ret); +#else + return 0; +#endif } /* diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index 899ffdaaed..ce1da79638 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -965,6 +965,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, /*FALLTHRU*/ +#ifdef HAVE_ZPL case ZFS_PROP_SHARESMB: case ZFS_PROP_SHARENFS: /* @@ -1075,6 +1076,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } break; +#endif /* HAVE_ZPL */ case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -2522,6 +2524,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) goto ancestorerr; } +#ifdef HAVE_ZPL if (zfs_mount(h, NULL, 0) != 0) { opname = dgettext(TEXT_DOMAIN, "mount"); goto ancestorerr; @@ -2531,6 +2534,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) opname = dgettext(TEXT_DOMAIN, "share"); goto ancestorerr; } +#endif /* HAVE_ZPL */ zfs_close(h); } @@ -3620,7 +3624,7 @@ error: /* * Given a zvol dataset, issue the ioctl to create the appropriate minor node, - * poke devfsadm to create the /dev link, and then wait for the link to appear. + * and wait briefly for udev to create the /dev link. */ int zvol_create_link(libzfs_handle_t *hdl, const char *dataset) @@ -3632,9 +3636,8 @@ static int zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) { zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; - di_devlink_handle_t dhdl; - priv_set_t *priv_effective; - int privileged; + char path[MAXPATHLEN]; + int error; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); @@ -3671,52 +3674,13 @@ zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) } /* - * If privileged call devfsadm and wait for the links to - * magically appear. - * Otherwise, print out an informational message. + * Wait up to 10 seconds for udev to create the device. */ - - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - - if (privileged) { - if ((dhdl = di_devlink_init(ZFS_DRIVER, - DI_MAKE_LINK)) == NULL) { - zfs_error_aux(hdl, strerror(errno)); - (void) zfs_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot create device links " - "for '%s'"), dataset); - (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); - return (-1); - } else { - (void) di_devlink_fini(&dhdl); - } - } else { - char pathname[MAXPATHLEN]; - struct stat64 statbuf; - int i; - -#define MAX_WAIT 10 - - /* - * This is the poor mans way of waiting for the link - * to show up. If after 10 seconds we still don't - * have it, then print out a message. - */ - (void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s", - dataset); - - for (i = 0; i != MAX_WAIT; i++) { - if (stat64(pathname, &statbuf) == 0) - break; - (void) sleep(1); - } - if (i == MAX_WAIT) - (void) printf(gettext("%s may not be immediately " - "available\n"), pathname); - } + (void) snprintf(path, sizeof (path), "/dev/%s", dataset); + error = zpool_label_disk_wait(path, 10000); + if (error) + (void) printf(gettext("%s may not be immediately " + "available\n"), path); return (0); } @@ -3852,6 +3816,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp) return (0); } +#ifdef HAVE_ZPL int zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred) { @@ -3917,6 +3882,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc); return (error); } +#endif /* HAVE_ZPL */ void zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 1dd345a275..055b42a5f5 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -85,6 +85,7 @@ #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ +#ifdef HAVE_ZPL static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); @@ -1225,7 +1226,6 @@ out: return (ret); } - static int zvol_cb(const char *dataset, void *data) { @@ -1398,3 +1398,53 @@ out: return (ret); } + +#else /* HAVE_ZPL */ + +int +zfs_unshare_iscsi(zfs_handle_t *zhp) +{ + return 0; +} + +int +zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) +{ + return 0; +} + +void +remove_mountpoint(zfs_handle_t *zhp) { + return; +} + +boolean_t +is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) +{ + return B_FALSE; +} + +boolean_t +zfs_is_mounted(zfs_handle_t *zhp, char **where) +{ + return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where); +} + +boolean_t +zfs_is_shared(zfs_handle_t *zhp) +{ + return B_FALSE; +} + +int +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +{ + return B_FALSE; +} + +int +zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) +{ + return B_FALSE; +} +#endif /* HAVE_ZPL */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 38cc627fcb..e8c0e7e273 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3285,113 +3285,3 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) return 0; } - -static boolean_t -supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) -{ - char *type; - nvlist_t **child; - uint_t children, c; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || - strcmp(type, VDEV_TYPE_FILE) == 0 || - strcmp(type, VDEV_TYPE_LOG) == 0 || - strcmp(type, VDEV_TYPE_MISSING) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "vdev type '%s' is not supported"), type); - (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); - return (B_FALSE); - } - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if (!supported_dump_vdev_type(hdl, child[c], errbuf)) - return (B_FALSE); - } - } - return (B_TRUE); -} - -/* - * check if this zvol is allowable for use as a dump device; zero if - * it is, > 0 if it isn't, < 0 if it isn't a zvol - */ -int -zvol_check_dump_config(char *arg) -{ - zpool_handle_t *zhp = NULL; - nvlist_t *config, *nvroot; - char *p, *volname; - nvlist_t **top; - uint_t toplevels; - libzfs_handle_t *hdl; - char errbuf[1024]; - char poolname[ZPOOL_MAXNAMELEN]; - int pathlen = strlen(ZVOL_FULL_DEV_DIR); - int ret = 1; - - if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { - return (-1); - } - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "dump is not supported on device '%s'"), arg); - - if ((hdl = libzfs_init()) == NULL) - return (1); - libzfs_print_on_error(hdl, B_TRUE); - - volname = arg + pathlen; - - /* check the configuration of the pool */ - if ((p = strchr(volname, '/')) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "malformed dataset name")); - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - return (1); - } else if (p - volname >= ZFS_MAXNAMELEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset name is too long")); - (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); - return (1); - } else { - (void) strncpy(poolname, volname, p - volname); - poolname[p - volname] = '\0'; - } - - if ((zhp = zpool_open(hdl, poolname)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not open pool '%s'"), poolname); - (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); - goto out; - } - config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not obtain vdev configuration for '%s'"), poolname); - (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); - goto out; - } - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - if (toplevels != 1) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' has multiple top level vdevs"), poolname); - (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf); - goto out; - } - - if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { - goto out; - } - ret = 0; - -out: - if (zhp) - zpool_close(zhp); - libzfs_fini(hdl); - return (ret); -} diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index be5b3949f7..d28a4f9f8e 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -1974,6 +1974,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * if we did a replication receive (indicated by stream_avl * being non-NULL). */ +#ifdef HAVE_ZPL cp = strchr(zc.zc_value, '@'); if (cp && (ioctl_err == 0 || !newfs)) { zfs_handle_t *h; @@ -2000,6 +2001,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } *cp = '@'; } +#endif /* HAVE_ZPL */ if (clp) { err |= changelist_postfix(clp); diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 8d0c47e301..91a48bfd10 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -605,7 +605,9 @@ libzfs_fini(libzfs_handle_t *hdl) (void) fclose(hdl->libzfs_mnttab); if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); +#ifdef HAVE_ZPL zfs_uninit_libshare(hdl); +#endif if (hdl->libzfs_log_str) (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl);