diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 91b85ed6b8..618ae10225 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -554,6 +554,7 @@ zfs_do_clone(int argc, char **argv) ret = zfs_clone(zhp, argv[1], props); /* create the mountpoint if necessary */ +#ifdef HAVE_ZPL if (ret == 0) { zfs_handle_t *clone; @@ -564,6 +565,7 @@ zfs_do_clone(int argc, char **argv) zfs_close(clone); } } +#endif /* HAVE_ZPL */ zfs_close(zhp); nvlist_free(props); @@ -761,6 +763,7 @@ zfs_do_create(int argc, char **argv) * in fact created, even if we failed to mount or share it. */ ret = 0; +#ifdef HAVE_ZPL if (canmount == ZFS_CANMOUNT_ON) { if (zfs_mount(zhp, NULL, 0) != 0) { (void) fprintf(stderr, gettext("filesystem " @@ -772,6 +775,7 @@ zfs_do_create(int argc, char **argv) ret = 1; } } +#endif /* HAVE_ZPL */ error: if (zhp) @@ -2787,6 +2791,7 @@ typedef struct get_all_cbdata { #define SPINNER_TIME 3 /* seconds */ #define MOUNT_TIME 5 /* seconds */ +#ifdef HAVE_ZPL static int get_one_dataset(zfs_handle_t *zhp, void *data) { @@ -3338,6 +3343,7 @@ share_mount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs mount -a [nfs | iscsi] @@ -3348,7 +3354,11 @@ share_mount(int op, int argc, char **argv) static int zfs_do_mount(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3360,9 +3370,14 @@ zfs_do_mount(int argc, char **argv) static int zfs_do_share(int argc, char **argv) { +#ifdef HAVE_ZPL return (share_mount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } +#ifdef HAVE_ZPL typedef struct unshare_unmount_node { zfs_handle_t *un_zhp; char *un_mountp; @@ -3815,6 +3830,7 @@ unshare_unmount(int op, int argc, char **argv) return (ret); } +#endif /* HAVE_ZPL */ /* * zfs unmount -a @@ -3825,7 +3841,11 @@ unshare_unmount(int op, int argc, char **argv) static int zfs_do_unmount(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_MOUNT, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* @@ -3837,7 +3857,11 @@ zfs_do_unmount(int argc, char **argv) static int zfs_do_unshare(int argc, char **argv) { +#ifdef HAVE_ZPL return (unshare_unmount(OP_SHARE, argc, argv)); +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } /* ARGSUSED */ @@ -3853,6 +3877,7 @@ zfs_do_python(int argc, char **argv) * Called when invoked as /etc/fs/zfs/mount. Do the mount if the mountpoint is * 'legacy'. Otherwise, complain that use should be using 'zfs mount'. */ +#ifdef HAVE_ZPL static int manual_mount(int argc, char **argv) { @@ -3983,6 +4008,7 @@ manual_unmount(int argc, char **argv) return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); } +#endif /* HAVE_ZPL */ static int volcheck(zpool_handle_t *zhp, void *data) @@ -4027,7 +4053,9 @@ main(int argc, char **argv) { int ret; int i = 0; +#ifdef HAVE_ZPL char *progname; +#endif char *cmdname; (void) setlocale(LC_ALL, ""); @@ -4052,6 +4080,7 @@ main(int argc, char **argv) return (1); } +#ifdef HAVE_ZPL /* * This command also doubles as the /etc/fs mount and unmount program. * Determine if we should take this behavior based on argv[0]. @@ -4062,6 +4091,9 @@ main(int argc, char **argv) } else if (strcmp(progname, "umount") == 0) { ret = manual_unmount(argc, argv); } else { +#else + { +#endif /* HAVE_ZPL */ /* * Make sure the user has specified some command. */ diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index 09c377ef8d..0ad8549b24 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -751,17 +751,20 @@ main(int argc, char **argv) if (dataset[0] != '\0' && domount) { if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL) return (1); - +#ifdef HAVE_ZPL if (zfs_unmount(zhp, NULL, 0) != 0) return (1); +#endif /* HAVE_ZPL */ } record.zi_error = error; ret = register_handler(pool, flags, &record, quiet); +#ifdef HAVE_ZPL if (dataset[0] != '\0' && domount) ret = (zfs_mount(zhp, NULL, 0) != 0); +#endif /* HAVE_ZPL */ libzfs_fini(g_zfs); diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index b6c454d24b..ca3f37b900 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -696,7 +696,9 @@ zpool_do_create(int argc, char **argv) (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 && strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) { char buf[MAXPATHLEN]; +#ifdef HAVE_ZPL DIR *dirp; +#endif if (mountpoint && mountpoint[0] != '/') { (void) fprintf(stderr, gettext("invalid mountpoint " @@ -721,6 +723,7 @@ zpool_do_create(int argc, char **argv) mountpoint); } +#ifdef HAVE_ZPL if ((dirp = opendir(buf)) == NULL && errno != ENOENT) { (void) fprintf(stderr, gettext("mountpoint '%s' : " "%s\n"), buf, strerror(errno)); @@ -743,6 +746,7 @@ zpool_do_create(int argc, char **argv) goto errout; } } +#endif /* HAVE_ZPL */ } if (dryrun) { @@ -773,8 +777,12 @@ zpool_do_create(int argc, char **argv) zfs_prop_to_name( ZFS_PROP_MOUNTPOINT), mountpoint) == 0); +#ifdef HAVE_ZPL if (zfs_mount(pool, NULL, 0) == 0) ret = zfs_shareall(pool); +#else + ret = 0; +#endif /* HAVE_ZPL */ zfs_close(pool); } } else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) { @@ -1531,11 +1539,13 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL) return (1); +#if HAVE_ZPL if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && zpool_enable_datasets(zhp, mntopts, 0) != 0) { zpool_close(zhp); return (1); } +#endif /* HAVE_ZPL */ zpool_close(zhp); return (error); diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index e3da385d2f..598403eed4 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -593,9 +593,6 @@ extern int zpool_read_label(int, nvlist_t **); extern int zpool_create_zvol_links(zpool_handle_t *); extern int zpool_remove_zvol_links(zpool_handle_t *); -/* is this zvol valid for use as a dump device? */ -extern int zvol_check_dump_config(char *); - /* * Management interfaces for SMB ACL files */ diff --git a/lib/libzfs/libzfs_changelist.c b/lib/libzfs/libzfs_changelist.c index 7eedffa53c..fb162cb5aa 100644 --- a/lib/libzfs/libzfs_changelist.c +++ b/lib/libzfs/libzfs_changelist.c @@ -93,6 +93,7 @@ struct prop_changelist { int changelist_prefix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -168,6 +169,9 @@ changelist_prefix(prop_changelist_t *clp) (void) changelist_postfix(clp); return (ret); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -182,6 +186,7 @@ changelist_prefix(prop_changelist_t *clp) int changelist_postfix(prop_changelist_t *clp) { +#ifdef HAVE_ZPL prop_changenode_t *cn; char shareopts[ZFS_MAXPROPLEN]; int errors = 0; @@ -306,6 +311,9 @@ changelist_postfix(prop_changelist_t *clp) } return (errors ? -1 : 0); +#else + return 0; +#endif /* HAVE_ZPL */ } /* @@ -368,6 +376,7 @@ changelist_rename(prop_changelist_t *clp, const char *src, const char *dst) int changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) { +#ifdef HAVE_ZPL prop_changenode_t *cn; int ret = 0; @@ -382,6 +391,9 @@ changelist_unshare(prop_changelist_t *clp, zfs_share_proto_t *proto) } return (ret); +#else + return 0; +#endif } /* diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index a55e73880b..9666023cc1 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -967,6 +967,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, /*FALLTHRU*/ +#ifdef HAVE_ZPL case ZFS_PROP_SHARESMB: case ZFS_PROP_SHARENFS: /* @@ -1077,6 +1078,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl, } break; +#endif /* HAVE_ZPL */ case ZFS_PROP_UTF8ONLY: chosen_utf = (int)intval; break; @@ -2534,6 +2536,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) goto ancestorerr; } +#ifdef HAVE_ZPL if (zfs_mount(h, NULL, 0) != 0) { opname = dgettext(TEXT_DOMAIN, "mount"); goto ancestorerr; @@ -2543,6 +2546,7 @@ create_parents(libzfs_handle_t *hdl, char *target, int prefixlen) opname = dgettext(TEXT_DOMAIN, "share"); goto ancestorerr; } +#endif /* HAVE_ZPL */ zfs_close(h); } @@ -3632,7 +3636,7 @@ error: /* * Given a zvol dataset, issue the ioctl to create the appropriate minor node, - * poke devfsadm to create the /dev link, and then wait for the link to appear. + * and wait briefly for udev to create the /dev link. */ int zvol_create_link(libzfs_handle_t *hdl, const char *dataset) @@ -3644,9 +3648,8 @@ static int zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) { zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; - di_devlink_handle_t dhdl; - priv_set_t *priv_effective; - int privileged; + char path[MAXPATHLEN]; + int error; (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name)); @@ -3683,52 +3686,13 @@ zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists) } /* - * If privileged call devfsadm and wait for the links to - * magically appear. - * Otherwise, print out an informational message. + * Wait up to 10 seconds for udev to create the device. */ - - priv_effective = priv_allocset(); - (void) getppriv(PRIV_EFFECTIVE, priv_effective); - privileged = (priv_isfullset(priv_effective) == B_TRUE); - priv_freeset(priv_effective); - - if (privileged) { - if ((dhdl = di_devlink_init(ZFS_DRIVER, - DI_MAKE_LINK)) == NULL) { - zfs_error_aux(hdl, strerror(errno)); - (void) zfs_error_fmt(hdl, errno, - dgettext(TEXT_DOMAIN, "cannot create device links " - "for '%s'"), dataset); - (void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc); - return (-1); - } else { - (void) di_devlink_fini(&dhdl); - } - } else { - char pathname[MAXPATHLEN]; - struct stat64 statbuf; - int i; - -#define MAX_WAIT 10 - - /* - * This is the poor mans way of waiting for the link - * to show up. If after 10 seconds we still don't - * have it, then print out a message. - */ - (void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s", - dataset); - - for (i = 0; i != MAX_WAIT; i++) { - if (stat64(pathname, &statbuf) == 0) - break; - (void) sleep(1); - } - if (i == MAX_WAIT) - (void) printf(gettext("%s may not be immediately " - "available\n"), pathname); - } + (void) snprintf(path, sizeof (path), "/dev/%s", dataset); + error = zpool_label_disk_wait(path, 10000); + if (error) + (void) printf(gettext("%s may not be immediately " + "available\n"), path); return (0); } @@ -3864,6 +3828,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp) return (0); } +#ifdef HAVE_ZPL int zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred) { @@ -3929,6 +3894,7 @@ zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path, error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc); return (error); } +#endif /* HAVE_ZPL */ void zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props) diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 1dd345a275..055b42a5f5 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -85,6 +85,7 @@ #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ +#ifdef HAVE_ZPL static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); @@ -1225,7 +1226,6 @@ out: return (ret); } - static int zvol_cb(const char *dataset, void *data) { @@ -1398,3 +1398,53 @@ out: return (ret); } + +#else /* HAVE_ZPL */ + +int +zfs_unshare_iscsi(zfs_handle_t *zhp) +{ + return 0; +} + +int +zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags) +{ + return 0; +} + +void +remove_mountpoint(zfs_handle_t *zhp) { + return; +} + +boolean_t +is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where) +{ + return B_FALSE; +} + +boolean_t +zfs_is_mounted(zfs_handle_t *zhp, char **where) +{ + return is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where); +} + +boolean_t +zfs_is_shared(zfs_handle_t *zhp) +{ + return B_FALSE; +} + +int +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +{ + return B_FALSE; +} + +int +zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) +{ + return B_FALSE; +} +#endif /* HAVE_ZPL */ diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 38cc627fcb..e8c0e7e273 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3285,113 +3285,3 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) return 0; } - -static boolean_t -supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf) -{ - char *type; - nvlist_t **child; - uint_t children, c; - - verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0); - if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 || - strcmp(type, VDEV_TYPE_FILE) == 0 || - strcmp(type, VDEV_TYPE_LOG) == 0 || - strcmp(type, VDEV_TYPE_MISSING) == 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "vdev type '%s' is not supported"), type); - (void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf); - return (B_FALSE); - } - if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN, - &child, &children) == 0) { - for (c = 0; c < children; c++) { - if (!supported_dump_vdev_type(hdl, child[c], errbuf)) - return (B_FALSE); - } - } - return (B_TRUE); -} - -/* - * check if this zvol is allowable for use as a dump device; zero if - * it is, > 0 if it isn't, < 0 if it isn't a zvol - */ -int -zvol_check_dump_config(char *arg) -{ - zpool_handle_t *zhp = NULL; - nvlist_t *config, *nvroot; - char *p, *volname; - nvlist_t **top; - uint_t toplevels; - libzfs_handle_t *hdl; - char errbuf[1024]; - char poolname[ZPOOL_MAXNAMELEN]; - int pathlen = strlen(ZVOL_FULL_DEV_DIR); - int ret = 1; - - if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) { - return (-1); - } - - (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, - "dump is not supported on device '%s'"), arg); - - if ((hdl = libzfs_init()) == NULL) - return (1); - libzfs_print_on_error(hdl, B_TRUE); - - volname = arg + pathlen; - - /* check the configuration of the pool */ - if ((p = strchr(volname, '/')) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "malformed dataset name")); - (void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf); - return (1); - } else if (p - volname >= ZFS_MAXNAMELEN) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "dataset name is too long")); - (void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf); - return (1); - } else { - (void) strncpy(poolname, volname, p - volname); - poolname[p - volname] = '\0'; - } - - if ((zhp = zpool_open(hdl, poolname)) == NULL) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not open pool '%s'"), poolname); - (void) zfs_error(hdl, EZFS_OPENFAILED, errbuf); - goto out; - } - config = zpool_get_config(zhp, NULL); - if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, - &nvroot) != 0) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "could not obtain vdev configuration for '%s'"), poolname); - (void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf); - goto out; - } - - verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, - &top, &toplevels) == 0); - if (toplevels != 1) { - zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, - "'%s' has multiple top level vdevs"), poolname); - (void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf); - goto out; - } - - if (!supported_dump_vdev_type(hdl, top[0], errbuf)) { - goto out; - } - ret = 0; - -out: - if (zhp) - zpool_close(zhp); - libzfs_fini(hdl); - return (ret); -} diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index eb799901d6..2419e6b45d 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -1975,6 +1975,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, * if we did a replication receive (indicated by stream_avl * being non-NULL). */ +#ifdef HAVE_ZPL cp = strchr(zc.zc_value, '@'); if (cp && (ioctl_err == 0 || !newfs)) { zfs_handle_t *h; @@ -2001,6 +2002,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, } *cp = '@'; } +#endif /* HAVE_ZPL */ if (clp) { err |= changelist_postfix(clp); diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 71d0278a41..c1f09e3c38 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -615,7 +615,9 @@ libzfs_fini(libzfs_handle_t *hdl) #endif if (hdl->libzfs_sharetab) (void) fclose(hdl->libzfs_sharetab); +#ifdef HAVE_ZPL zfs_uninit_libshare(hdl); +#endif if (hdl->libzfs_log_str) (void) free(hdl->libzfs_log_str); zpool_free_handles(hdl); diff --git a/module/zcommon/include/sys/fs/zfs.h b/module/zcommon/include/sys/fs/zfs.h index 86b36a8ae9..ef38ea336f 100644 --- a/module/zcommon/include/sys/fs/zfs.h +++ b/module/zcommon/include/sys/fs/zfs.h @@ -533,23 +533,13 @@ typedef struct vdev_stat { uint64_t vs_scrub_end; /* UTC scrub end time */ } vdev_stat_t; -#define ZVOL_DRIVER "zvol" -#define ZFS_DRIVER "zfs" -#define ZFS_DEV "/dev/zfs" - -/* - * zvol paths. Irritatingly, the devfsadm interfaces want all these - * paths without the /dev prefix, but for some things, we want the - * /dev prefix. Below are the names without /dev. - */ -#define ZVOL_DEV_DIR "zvol/dsk" -#define ZVOL_RDEV_DIR "zvol/rdsk" - -/* - * And here are the things we need with /dev, etc. in front of them. - */ -#define ZVOL_PSEUDO_DEV "/devices/pseudo/zfs@0:" -#define ZVOL_FULL_DEV_DIR "/dev/" ZVOL_DEV_DIR "/" +#define ZVOL_DRIVER "zvol" +#define ZFS_DRIVER "zfs" +#define ZFS_DEV "/dev/zfs" +#define ZVOL_MAJOR 230 +#define ZVOL_MINOR_BITS 4 +#define ZVOL_MINOR_MASK ((1U << ZVOL_MINOR_BITS) - 1) +#define ZVOL_MINORS (1 << 4) #define ZVOL_PROP_NAME "name" diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 627cfebdf9..454bde9590 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -669,9 +669,58 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } #ifdef _KERNEL -int -dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) + +/* + * Copy up to size bytes between arg_buf and req based on the data direction + * described by the req. If an entire req's data cannot be transfered the + * req's is updated such that it's current index and bv offsets correctly + * reference any residual data which could not be copied. The return value + * is the number of bytes successfully copied to arg_buf. + */ +static int +dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req) { + struct bio_vec *bv; + struct req_iterator iter; + char *bv_buf; + int tocpy; + + *offset = 0; + rq_for_each_segment(bv, req, iter) { + + /* Fully consumed the passed arg_buf */ + ASSERT3S(offset, <=, size); + if (size == *offset) + break; + + /* Skip fully consumed bv's */ + if (bv->bv_len == 0) + continue; + + tocpy = MIN(bv->bv_len, size - *offset); + ASSERT3S(tocpy, >=, 0); + + bv_buf = page_address(bv->bv_page) + bv->bv_offset; + ASSERT3P(bv_buf, !=, NULL); + + if (rq_data_dir(req) == WRITE) + memcpy(arg_buf + *offset, bv_buf, tocpy); + else + memcpy(bv_buf, arg_buf + *offset, tocpy); + + *offset += tocpy; + bv->bv_offset += tocpy; + bv->bv_len -= tocpy; + } + + return 0; +} + +int +dmu_read_req(objset_t *os, uint64_t object, struct request *req) +{ + uint64_t size = blk_rq_bytes(req); + uint64_t offset = blk_rq_pos(req) << 9; dmu_buf_t **dbp; int numbufs, i, err; @@ -679,27 +728,33 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, - &numbufs, &dbp); + err = dmu_buf_hold_array(os, object, offset, size, TRUE, FTAG, + &numbufs, &dbp); if (err) return (err); for (i = 0; i < numbufs; i++) { - int tocpy; - int bufoff; + int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + bufoff = offset - db->db_offset; + ASSERT3S(bufoff, >=, 0); - bufoff = uio->uio_loffset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); + if (tocpy == 0) + break; + + err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req); + + if (didcpy < tocpy) + err = EIO; - err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_READ, uio); if (err) break; size -= tocpy; + offset += didcpy; + err = 0; } dmu_buf_rele_array(dbp, numbufs, FTAG); @@ -707,30 +762,31 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) } int -dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, - dmu_tx_t *tx) +dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx) { + uint64_t size = blk_rq_bytes(req); + uint64_t offset = blk_rq_pos(req) << 9; dmu_buf_t **dbp; - int numbufs, i; - int err = 0; + int numbufs, i, err; if (size == 0) return (0); - err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, - FALSE, FTAG, &numbufs, &dbp); + err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG, + &numbufs, &dbp); if (err) return (err); for (i = 0; i < numbufs; i++) { - int tocpy; - int bufoff; + int tocpy, didcpy, bufoff; dmu_buf_t *db = dbp[i]; - ASSERT(size > 0); + bufoff = offset - db->db_offset; + ASSERT3S(bufoff, >=, 0); - bufoff = uio->uio_loffset - db->db_offset; tocpy = (int)MIN(db->db_size - bufoff, size); + if (tocpy == 0) + break; ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); @@ -739,27 +795,27 @@ dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, else dmu_buf_will_dirty(db, tx); - /* - * XXX uiomove could block forever (eg. nfs-backed - * pages). There needs to be a uiolockdown() function - * to lock the pages in memory, so that uiomove won't - * block. - */ - err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_WRITE, uio); + err = dmu_req_copy(db->db_data + bufoff, tocpy, &didcpy, req); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); + if (didcpy < tocpy) + err = EIO; + if (err) break; size -= tocpy; + offset += didcpy; + err = 0; } dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } +#endif +#ifdef HAVE_ZPL int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, page_t *pp, dmu_tx_t *tx) diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index dd74ad5103..f849832091 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -246,7 +246,13 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv) ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); - dprintf_ds(ds, "evicting %s\n", ""); + /* + * XXX: Commented out because dsl_dataset_name() is called + * which references ds->ds_dir which it seems may be NULL. + * This is easily trigged with 'zfs destroy /. + * * + * dprintf_ds(ds, "evicting %s\n", ""); + */ unique_remove(ds->ds_fsid_guid); diff --git a/module/zfs/include/sys/blkdev.h b/module/zfs/include/sys/blkdev.h new file mode 100644 index 0000000000..19ded8b6ba --- /dev/null +++ b/module/zfs/include/sys/blkdev.h @@ -0,0 +1,164 @@ +#ifndef _SYS_BLKDEV_H +#define _SYS_BLKDEV_H + +#ifdef _KERNEL + +#include +#include +#include "zfs_config.h" + +#ifndef HAVE_BLK_FETCH_REQUEST +static inline struct request * +blk_fetch_request(struct request_queue *q) +{ + struct request *req; + + req = elv_next_request(q); + if (req) + blkdev_dequeue_request(req); + + return req; +} +#endif /* HAVE_BLK_FETCH_REQUEST */ + +#ifndef HAVE_BLK_REQUEUE_REQUEST +static inline void +blk_requeue_request(request_queue_t *q, struct request *req) +{ + elv_requeue_request(q, req); +} +#endif /* HAVE_BLK_REQUEUE_REQUEST */ + +#ifndef HAVE_BLK_END_REQUEST +static inline bool +blk_end_request(struct request *req, int error, unsigned int nr_bytes) +{ + struct request_queue *q = req->q; + LIST_HEAD(list); + + /* + * Request has already been dequeued but 2.6.18 version of + * end_request() unconditionally dequeues the request so we + * add it to a local list to prevent hitting the BUG_ON. + */ + list_add(&req->queuelist, &list); + + /* + * The old API required the driver to end each segment and not + * the entire request. In our case we always need to end the + * entire request partial requests are not supported. + */ + req->hard_cur_sectors = nr_bytes >> 9; + + + spin_lock_irq(q->queue_lock); + end_request(req, ((error == 0) ? 1 : error)); + spin_unlock_irq(q->queue_lock); + + return 0; +} +#else +# ifdef HAVE_BLK_END_REQUEST_GPL_ONLY +/* + * Define required to avoid conflicting 2.6.29 non-static prototype for a + * GPL-only version of the helper. As of 2.6.31 the helper is available + * to non-GPL modules and is not explicitly exported GPL-only. + */ +# define blk_end_request ___blk_end_request +static inline bool +___blk_end_request(struct request *req, int error, unsigned int nr_bytes) +{ + struct request_queue *q = req->q; + + /* + * The old API required the driver to end each segment and not + * the entire request. In our case we always need to end the + * entire request partial requests are not supported. + */ + req->hard_cur_sectors = nr_bytes >> 9; + + spin_lock_irq(q->queue_lock); + end_request(req, ((error == 0) ? 1 : error)); + spin_unlock_irq(q->queue_lock); + + return 0; +} +# endif /* HAVE_BLK_END_REQUEST_GPL_ONLY */ +#endif /* HAVE_BLK_END_REQUEST */ + +#ifndef HAVE_BLK_RQ_POS +static inline sector_t +blk_rq_pos(struct request *req) +{ + return req->sector; +} +#endif /* HAVE_BLK_RQ_POS */ + +#ifndef HAVE_BLK_RQ_SECTORS +static inline unsigned int +blk_rq_sectors(struct request *req) +{ + return req->nr_sectors; +} +#endif /* HAVE_BLK_RQ_SECTORS */ + +#if !defined(HAVE_BLK_RQ_BYTES) || defined(HAVE_BLK_RQ_BYTES_GPL_ONLY) +/* + * Define required to avoid conflicting 2.6.29 non-static prototype for a + * GPL-only version of the helper. As of 2.6.31 the helper is available + * to non-GPL modules in the form of a static inline in the header. + */ +#define blk_rq_bytes __blk_rq_bytes +static inline unsigned int +__blk_rq_bytes(struct request *req) +{ + return blk_rq_sectors(req) << 9; +} +#endif /* !HAVE_BLK_RQ_BYTES || HAVE_BLK_RQ_BYTES_GPL_ONLY */ + +#ifndef HAVE_GET_DISK_RO +static inline int +get_disk_ro(struct gendisk *disk) +{ + int policy = 0; + + if (disk->part[0]) + policy = disk->part[0]->policy; + + return policy; +} +#endif /* HAVE_GET_DISK_RO */ + +#ifndef HAVE_RQ_IS_SYNC +static inline bool +rq_is_sync(struct request *req) +{ + return (req->flags & REQ_RW_SYNC); +} +#endif /* HAVE_RQ_IS_SYNC */ + +#ifndef HAVE_RQ_FOR_EACH_SEGMENT +struct req_iterator { + int i; + struct bio *bio; +}; + +# define for_each_bio(_bio) \ + for (; _bio; _bio = _bio->bi_next) + +# define __rq_for_each_bio(_bio, rq) \ + if ((rq->bio)) \ + for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) + +# define rq_for_each_segment(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_segment(bvl, _iter.bio, _iter.i) +#endif /* HAVE_RQ_FOR_EACH_SEGMENT */ + +#ifndef DISK_NAME_LEN +#define DISK_NAME_LEN 32 +#endif /* DISK_NAME_LEN */ + +#endif /* KERNEL */ + +#endif /* _SYS_BLKDEV_H */ diff --git a/module/zfs/include/sys/dmu.h b/module/zfs/include/sys/dmu.h index 88e3c1bd4b..8456b75e7f 100644 --- a/module/zfs/include/sys/dmu.h +++ b/module/zfs/include/sys/dmu.h @@ -38,12 +38,14 @@ #include #include #include +#ifdef _KERNEL +#include +#endif #ifdef __cplusplus extern "C" { #endif -struct uio; struct page; struct vnode; struct spa; @@ -490,11 +492,14 @@ void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf, dmu_tx_t *tx); void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, dmu_tx_t *tx); -int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size); -int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size, - dmu_tx_t *tx); +#ifdef _KERNEL +int dmu_read_req(objset_t *os, uint64_t object, struct request *req); +int dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx); +#endif +#ifdef HAVE_ZPL int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct page *pp, dmu_tx_t *tx); +#endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); void dmu_return_arcbuf(struct arc_buf *buf); void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf, diff --git a/module/zfs/include/sys/spa.h b/module/zfs/include/sys/spa.h index 14c3db86f6..e8e0c92497 100644 --- a/module/zfs/include/sys/spa.h +++ b/module/zfs/include/sys/spa.h @@ -456,7 +456,6 @@ extern uint64_t spa_get_dspace(spa_t *spa); extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize); extern uint64_t spa_version(spa_t *spa); extern int spa_max_replication(spa_t *spa); -extern int spa_busy(void); extern uint8_t spa_get_failmode(spa_t *spa); extern boolean_t spa_suspended(spa_t *spa); diff --git a/module/zfs/include/sys/zfs_fuid.h b/module/zfs/include/sys/zfs_fuid.h index f81ddf4a55..9910ce11a4 100644 --- a/module/zfs/include/sys/zfs_fuid.h +++ b/module/zfs/include/sys/zfs_fuid.h @@ -98,6 +98,7 @@ typedef struct zfs_fuid_info { } zfs_fuid_info_t; #ifdef _KERNEL +#ifdef HAVE_ZPL struct znode; extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t); extern void zfs_fuid_destroy(zfsvfs_t *); @@ -115,6 +116,7 @@ extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain, char **retdomain, boolean_t addok); extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx); extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +#endif /* HAVE_ZPL */ #endif char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t); diff --git a/module/zfs/include/sys/zfs_ioctl.h b/module/zfs/include/sys/zfs_ioctl.h index 3a3e6e7118..59992993df 100644 --- a/module/zfs/include/sys/zfs_ioctl.h +++ b/module/zfs/include/sys/zfs_ioctl.h @@ -191,7 +191,6 @@ extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); extern int zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr); extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr); -extern int zfs_busy(void); extern int zfs_unmount_snap(char *, void *); #endif /* _KERNEL */ diff --git a/module/zfs/include/sys/zfs_znode.h b/module/zfs/include/sys/zfs_znode.h index 240804c6ed..d141c03027 100644 --- a/module/zfs/include/sys/zfs_znode.h +++ b/module/zfs/include/sys/zfs_znode.h @@ -345,8 +345,10 @@ extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap); extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); +#if defined(HAVE_UIO_RW) extern caddr_t zfs_map_page(page_t *, enum seg_rw); extern void zfs_unmap_page(page_t *, caddr_t); +#endif /* HAVE_UIO_RW */ extern zil_get_data_t zfs_get_data; extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; diff --git a/module/zfs/include/sys/zvol.h b/module/zfs/include/sys/zvol.h index 74ebc83e08..e162e2b47c 100644 --- a/module/zfs/include/sys/zvol.h +++ b/module/zfs/include/sys/zvol.h @@ -20,51 +20,33 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_ZVOL_H #define _SYS_ZVOL_H - - #include -#ifdef __cplusplus -extern "C" { -#endif - #define ZVOL_OBJ 1ULL #define ZVOL_ZAP_OBJ 2ULL #ifdef _KERNEL + +#include + extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize); extern int zvol_check_volblocksize(uint64_t volblocksize); extern int zvol_get_stats(objset_t *os, nvlist_t *nv); extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); -extern int zvol_create_minor(const char *, major_t); +extern int zvol_create_minor(const char *); extern int zvol_remove_minor(const char *); -extern int zvol_set_volsize(const char *, major_t, uint64_t); +extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volblocksize(const char *, uint64_t); -extern int zvol_open(dev_t *devp, int flag, int otyp, cred_t *cr); -extern int zvol_dump(dev_t dev, caddr_t addr, daddr_t offset, int nblocks); -extern int zvol_close(dev_t dev, int flag, int otyp, cred_t *cr); -extern int zvol_strategy(buf_t *bp); -extern int zvol_read(dev_t dev, uio_t *uiop, cred_t *cr); -extern int zvol_write(dev_t dev, uio_t *uiop, cred_t *cr); -extern int zvol_aread(dev_t dev, struct aio_req *aio, cred_t *cr); -extern int zvol_awrite(dev_t dev, struct aio_req *aio, cred_t *cr); -extern int zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, - int *rvalp); -extern int zvol_busy(void); -extern void zvol_init(void); +extern int zvol_init(void); extern void zvol_fini(void); -#endif - -#ifdef __cplusplus -} -#endif +#endif /* KERNEL */ #endif /* _SYS_ZVOL_H */ diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 4cef53f951..8d05d93872 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -23,6 +23,8 @@ * Use is subject to license terms. */ +#ifdef HAVE_ZPL + #include #include @@ -262,3 +264,4 @@ rrw_held(rrwlock_t *rrl, krw_t rw) return (held); } +#endif /* HAVE_ZPL */ diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index f80e690786..4f40e3a608 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1348,12 +1348,6 @@ spa_name_compare(const void *a1, const void *a2) return (0); } -int -spa_busy(void) -{ - return (spa_active_count); -} - void spa_boot_init(void) { diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index 12ffe9f30a..9cb20a1534 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -23,6 +23,8 @@ * Use is subject to license terms. */ +#ifdef HAVE_ZPL + #include #include #include @@ -2848,3 +2850,5 @@ zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, return (error); } + +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index c6c719871c..f46699fdfb 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -64,6 +64,8 @@ * so that it cannot be freed until all snapshots have been unmounted. */ +#ifdef HAVE_ZPL + #include #include #include @@ -1333,3 +1335,4 @@ zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) return (error); } +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index b3f7683286..b76113bad0 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -23,6 +23,8 @@ * Use is subject to license terms. */ +#ifdef HAVE_ZPL + #include #include #include @@ -962,3 +964,4 @@ zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) else return (secpolicy_vnode_remove(cr)); } +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_fuid.c b/module/zfs/zfs_fuid.c index 2409652186..4925bb0046 100644 --- a/module/zfs/zfs_fuid.c +++ b/module/zfs/zfs_fuid.c @@ -194,6 +194,7 @@ zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx) } #ifdef _KERNEL +#ifdef HAVE_ZPL /* * Load the fuid table(s) into memory. */ @@ -743,4 +744,5 @@ zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx) FUID_SIZE_ESTIMATE(zfsvfs)); } } +#endif /* HAVE_ZPL */ #endif diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index a38b49b319..72d4d0c7aa 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -64,18 +64,16 @@ #include #include +#include + #include "zfs_namecheck.h" #include "zfs_prop.h" #include "zfs_deleg.h" - -extern struct modlfs zfs_modlfs; +#include "zfs_config.h" extern void zfs_init(void); extern void zfs_fini(void); -ldi_ident_t zfs_li = NULL; -dev_info_t *zfs_dip; - typedef int zfs_ioc_func_t(zfs_cmd_t *); typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); @@ -403,6 +401,7 @@ zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr) ZFS_DELEG_PERM_SEND, cr)); } +#ifdef HAVE_ZPL static int zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) { @@ -426,10 +425,12 @@ zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr) return (dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_SHARE, cr)); } +#endif /* HAVE_ZPL */ int zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) { +#ifdef HAVE_ZPL if (!INGLOBALZONE(curproc)) return (EPERM); @@ -438,11 +439,15 @@ zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr) } else { return (zfs_secpolicy_deleg_share(zc, cr)); } +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } int zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) { +#ifdef HAVE_ZPL if (!INGLOBALZONE(curproc)) return (EPERM); @@ -451,6 +456,9 @@ zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr) } else { return (zfs_secpolicy_deleg_share(zc, cr)); } +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } static int @@ -645,6 +653,7 @@ zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr) return (error); } +#ifdef HAVE_ZPL static int zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) { @@ -656,6 +665,7 @@ zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr) } return (error); } +#endif /* HAVE_ZPL */ /* * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires @@ -836,6 +846,7 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) return (error); } +#ifdef HAVE_ZPL static int getzfsvfs(const char *dsname, zfsvfs_t **zvp) { @@ -898,6 +909,7 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) zfsvfs_free(zfsvfs); } } +#endif /* HAVE_ZPL */ static int zfs_ioc_pool_create(zfs_cmd_t *zc) @@ -1713,6 +1725,7 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) { +#ifdef HAVE_ZPL uint64_t *valary; unsigned int vallen; const char *domain; @@ -1741,6 +1754,10 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) continue; else goto out; +#else + error = ENOTSUP; + goto out; +#endif } else if (zfs_prop_user(propname)) { VERIFY(nvpair_value_string(elem, &strval) == 0); error = dsl_prop_set(name, propname, 1, @@ -1781,8 +1798,7 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) case ZFS_PROP_VOLSIZE: if ((error = nvpair_value_uint64(elem, &intval)) != 0 || - (error = zvol_set_volsize(name, - ddi_driver_major(zfs_dip), intval)) != 0) + (error = zvol_set_volsize(name, intval)) != 0) goto out; break; @@ -1794,6 +1810,7 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) case ZFS_PROP_VERSION: { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs; if ((error = nvpair_value_uint64(elem, &intval)) != 0) @@ -1812,6 +1829,10 @@ zfs_set_prop_nvlist(const char *name, nvlist_t *nvl) if (error) goto out; break; +#else + error = ENOTSUP; + goto out; +#endif /* HAVE_ZPL */ } default: @@ -2023,6 +2044,7 @@ zfs_ioc_pool_get_props(zfs_cmd_t *zc) static int zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL nvlist_t *nvp; int error; uint32_t uid; @@ -2065,6 +2087,9 @@ zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc) zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred); crfree(usercred); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -2147,7 +2172,7 @@ zfs_ioc_get_fsacl(zfs_cmd_t *zc) static int zfs_ioc_create_minor(zfs_cmd_t *zc) { - return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip))); + return (zvol_create_minor(zc->zc_name)); } /* @@ -2162,6 +2187,7 @@ zfs_ioc_remove_minor(zfs_cmd_t *zc) return (zvol_remove_minor(zc->zc_name)); } +#ifdef HAVE_ZPL /* * Search the vfs list for a specified resource. Returns a pointer to it * or NULL if no suitable entry is found. The caller of this routine @@ -2186,6 +2212,7 @@ zfs_get_vfs(const char *resource) vfs_list_unlock(); return (vfs_found); } +#endif /* HAVE_ZPL */ /* ARGSUSED */ static void @@ -2535,6 +2562,7 @@ out: int zfs_unmount_snap(char *name, void *arg) { +#ifdef HAVE_ZPL vfs_t *vfsp = NULL; if (arg) { @@ -2566,6 +2594,7 @@ zfs_unmount_snap(char *name, void *arg) if ((err = dounmount(vfsp, flag, kcred)) != 0) return (err); } +#endif /* HAVE_ZPL */ return (0); } @@ -2621,6 +2650,7 @@ zfs_ioc_destroy(zfs_cmd_t *zc) static int zfs_ioc_rollback(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL objset_t *os; int error; zfsvfs_t *zfsvfs = NULL; @@ -2654,6 +2684,9 @@ zfs_ioc_rollback(zfs_cmd_t *zc) /* Note, the dmu_objset_rollback() releases the objset for us. */ return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -2727,7 +2760,9 @@ static int zfs_ioc_recv(zfs_cmd_t *zc) { file_t *fp; +#ifdef HAVE_ZPL objset_t *os; +#endif /* HAVE_ZPL */ dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int error, fd; @@ -2760,6 +2795,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) return (EBADF); } +#ifdef HAVE_ZPL if (props && dmu_objset_open(tofs, DMU_OST_ANY, DS_MODE_USER | DS_MODE_READONLY, &os) == 0) { /* @@ -2770,6 +2806,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) dmu_objset_close(os); } +#endif /* HAVE_ZPL */ if (zc->zc_string[0]) { error = dmu_objset_open(zc->zc_string, DMU_OST_ANY, @@ -2801,6 +2838,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) error = dmu_recv_stream(&drc, fp->f_vnode, &off); if (error == 0) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs = NULL; if (getzfsvfs(tofs, &zfsvfs) == 0) { @@ -2827,6 +2865,9 @@ zfs_ioc_recv(zfs_cmd_t *zc) } else { error = dmu_recv_end(&drc); } +#else + error = dmu_recv_end(&drc); +#endif /* HAVE_ZPL */ } zc->zc_cookie = off - fp->f_offset; @@ -3057,6 +3098,7 @@ zfs_ioc_promote(zfs_cmd_t *zc) static int zfs_ioc_userspace_one(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs; int error; @@ -3072,6 +3114,9 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc) zfsvfs_rele(zfsvfs, FTAG); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -3088,6 +3133,7 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc) static int zfs_ioc_userspace_many(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL zfsvfs_t *zfsvfs; int error; @@ -3110,6 +3156,9 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) zfsvfs_rele(zfsvfs, FTAG); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -3122,6 +3171,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc) static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL objset_t *os; int error; zfsvfs_t *zfsvfs; @@ -3154,6 +3204,9 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) } return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -3163,6 +3216,7 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc) * the first file system is shared. * Neither sharefs, nfs or smbsrv are unloadable modules. */ +#ifdef HAVE_ZPL int (*znfsexport_fs)(void *arg); int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t); int (*zsmbexport_fs)(void *arg, boolean_t add_share); @@ -3194,10 +3248,12 @@ zfs_init_sharefs() } return (0); } +#endif /* HAVE_ZPL */ static int zfs_ioc_share(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL int error; int opcode; @@ -3287,7 +3343,9 @@ zfs_ioc_share(zfs_cmd_t *zc) zc->zc_share.z_sharemax); return (error); - +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } ace_t full_access[] = { @@ -3297,6 +3355,7 @@ ace_t full_access[] = { /* * Remove all ACL files in shares dir */ +#ifdef HAVE_ZPL static int zfs_smb_acl_purge(znode_t *dzp) { @@ -3315,10 +3374,12 @@ zfs_smb_acl_purge(znode_t *dzp) zap_cursor_fini(&zc); return (error); } +#endif /* HAVE ZPL */ static int zfs_ioc_smb_acl(zfs_cmd_t *zc) { +#ifdef HAVE_ZPL vnode_t *vp; znode_t *dzp; vnode_t *resourcevp = NULL; @@ -3440,6 +3501,9 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc) ZFS_EXIT(zfsvfs); return (error); +#else + return (ENOTSUP); +#endif /* HAVE_ZPL */ } /* @@ -3632,28 +3696,23 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type) return (error); } -static int -zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) +static long +zfs_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { zfs_cmd_t *zc; uint_t vec; - int error, rc; - - if (getminor(dev) != 0) - return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); + int error, rc, flag = 0; vec = cmd - ZFS_IOC; - ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip)); - if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) - return (EINVAL); + return (-EINVAL); zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); if (error == 0) - error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr); + error = zfs_ioc_vec[vec].zvec_secpolicy(zc, NULL); /* * Ensure that all pool/dataset names are valid before we pass down to @@ -3695,121 +3754,59 @@ zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) } vmem_free(zc, sizeof (zfs_cmd_t)); - return (error); + return (-error); } -static int -zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +#ifdef CONFIG_COMPAT +static long +zfs_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg) { - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0, - DDI_PSEUDO, 0) == DDI_FAILURE) - return (DDI_FAILURE); - - zfs_dip = dip; - - ddi_report_dev(dip); - - return (DDI_SUCCESS); + return zfs_ioctl(filp, cmd, arg); } +#else +#define zfs_compat_ioctl NULL +#endif + +static const struct file_operations zfs_fops = { + .unlocked_ioctl = zfs_ioctl, + .compat_ioctl = zfs_compat_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice zfs_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = ZFS_DRIVER, + .fops = &zfs_fops, +}; static int -zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +zfs_attach(void) { - if (spa_busy() || zfs_busy() || zvol_busy()) - return (DDI_FAILURE); + int error; - if (cmd != DDI_DETACH) - return (DDI_FAILURE); - - zfs_dip = NULL; - - ddi_prop_remove_all(dip); - ddi_remove_minor_node(dip, NULL); - - return (DDI_SUCCESS); -} - -/*ARGSUSED*/ -static int -zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = zfs_dip; - return (DDI_SUCCESS); - - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - return (DDI_SUCCESS); + error = misc_register(&zfs_misc); + if (error) { + printk(KERN_INFO "ZFS: misc_register() failed %d\n", error); + return (error); } - return (DDI_FAILURE); + return (0); } -/* - * OK, so this is a little weird. - * - * /dev/zfs is the control node, i.e. minor 0. - * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0. - * - * /dev/zfs has basically nothing to do except serve up ioctls, - * so most of the standard driver entry points are in zvol.c. - */ -static struct cb_ops zfs_cb_ops = { - zvol_open, /* open */ - zvol_close, /* close */ - zvol_strategy, /* strategy */ - nodev, /* print */ - zvol_dump, /* dump */ - zvol_read, /* read */ - zvol_write, /* write */ - zfsdev_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* prop_op */ - NULL, /* streamtab */ - D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */ - CB_REV, /* version */ - nodev, /* async read */ - nodev, /* async write */ -}; - -static struct dev_ops zfs_dev_ops = { - DEVO_REV, /* version */ - 0, /* refcnt */ - zfs_info, /* info */ - nulldev, /* identify */ - nulldev, /* probe */ - zfs_attach, /* attach */ - zfs_detach, /* detach */ - nodev, /* reset */ - &zfs_cb_ops, /* driver operations */ - NULL, /* no bus operations */ - NULL, /* power */ - ddi_quiesce_not_needed, /* quiesce */ -}; - -static struct modldrv zfs_modldrv = { - &mod_driverops, - "ZFS storage pool", - &zfs_dev_ops -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&zfs_modlfs, - (void *)&zfs_modldrv, - NULL -}; +static void +zfs_detach(void) +{ + int error; + error = misc_deregister(&zfs_misc); + if (error) + printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error); +} +#ifdef HAVE_ZPL uint_t zfs_fsyncer_key; extern uint_t rrw_tsd_key; +#endif int _init(void) @@ -3818,21 +3815,28 @@ _init(void) spa_init(FREAD | FWRITE); zfs_init(); - zvol_init(); - if ((error = mod_install(&modlinkage)) != 0) { - zvol_fini(); + if ((error = zvol_init()) != 0) { zfs_fini(); spa_fini(); return (error); } + if ((error = zfs_attach()) != 0) { + (void)zvol_fini(); + zfs_fini(); + spa_fini(); + return (error); + } + +#ifdef HAVE_ZPL tsd_create(&zfs_fsyncer_key, NULL); tsd_create(&rrw_tsd_key, NULL); - error = ldi_ident_from_mod(&modlinkage, &zfs_li); - ASSERT(error == 0); mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL); +#endif /* HAVE_ZPL */ + + printk(KERN_INFO "ZFS: Loaded ZFS Filesystem v%s\n", ZFS_META_VERSION); return (0); } @@ -3840,17 +3844,11 @@ _init(void) int _fini(void) { - int error; - - if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled) - return (EBUSY); - - if ((error = mod_remove(&modlinkage)) != 0) - return (error); - + zfs_detach(); zvol_fini(); zfs_fini(); spa_fini(); +#ifdef HAVE_ZPL if (zfs_nfsshare_inited) (void) ddi_modclose(nfs_mod); if (zfs_smbshare_inited) @@ -3858,16 +3856,18 @@ _fini(void) if (zfs_nfsshare_inited || zfs_smbshare_inited) (void) ddi_modclose(sharefs_mod); - tsd_destroy(&zfs_fsyncer_key); - ldi_ident_release(zfs_li); - zfs_li = NULL; mutex_destroy(&zfs_share_lock); + tsd_destroy(&zfs_fsyncer_key); +#endif /* HAVE_ZPL */ - return (error); + return (0); } -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} +#ifdef HAVE_SPL +spl_module_init(_init); +spl_module_exit(_fini); + +MODULE_AUTHOR("Sun Microsystems, Inc"); +MODULE_DESCRIPTION("ZFS"); +MODULE_LICENSE("CDDL"); +#endif /* HAVE_SPL */ diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 3f0b6b0ed3..5bb9fb2b29 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -23,6 +23,8 @@ * Use is subject to license terms. */ +#ifdef HAVE_ZPL + #include #include #include @@ -704,3 +706,5 @@ zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, seq = zil_itx_assign(zilog, itx, tx); zp->z_last_itx = seq; } + +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_replay.c b/module/zfs/zfs_replay.c index 819ba2886c..7aeff072a4 100644 --- a/module/zfs/zfs_replay.c +++ b/module/zfs/zfs_replay.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ - +#ifdef HAVE_ZPL #include #include @@ -876,3 +876,4 @@ zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { zfs_replay_create, /* TX_MKDIR_ATTR */ zfs_replay_create_acl, /* TX_MKDIR_ACL_ATTR */ }; +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index d03f92ba00..c4fa8c12c8 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -61,6 +61,7 @@ #include #include +#ifdef HAVE_ZPL int zfsfstype; vfsops_t *zfs_vfsops = NULL; static major_t zfs_major; @@ -1957,10 +1958,12 @@ zfs_vfsinit(int fstype, char *name) return (0); } +#endif /* HAVE_ZPL */ void zfs_init(void) { +#ifdef HAVE_ZPL /* * Initialize .zfs directory structures */ @@ -1972,21 +1975,19 @@ zfs_init(void) zfs_znode_init(); dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb); +#endif /* HAVE_ZPL */ } void zfs_fini(void) { +#ifdef HAVE_ZPL zfsctl_fini(); zfs_znode_fini(); +#endif /* HAVE_ZPL */ } -int -zfs_busy(void) -{ - return (zfs_active_fs_count != 0); -} - +#ifdef HAVE_ZPL int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) { @@ -2029,6 +2030,7 @@ zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) return (0); } +#endif /* HAVE_ZPL */ /* * Read a property stored within the master node. @@ -2072,6 +2074,7 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) return (error); } +#ifdef HAVE_ZPL static vfsdef_t vfw = { VFSDEF_VERSION, MNTTYPE_ZFS, @@ -2084,3 +2087,4 @@ static vfsdef_t vfw = { struct modlfs zfs_modlfs = { &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw }; +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 8eb4665aed..818d88cbff 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -25,6 +25,8 @@ /* Portions Copyright 2007 Jeremy Teo */ +#ifdef HAVE_ZPL + #include #include #include @@ -318,6 +320,7 @@ zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, return (ENOTTY); } +#if defined(_KERNEL) && defined(HAVE_UIO_RW) /* * Utility functions to map and unmap a single physical page. These * are used to manage the mappable copies of ZFS file data, and therefore @@ -342,6 +345,7 @@ zfs_unmap_page(page_t *pp, caddr_t addr) ppmapout(addr); } } +#endif /* _KERNEL && HAVE_UIO_RW */ /* * When a file is memory mapped, we must keep the IO data synchronized @@ -4695,3 +4699,4 @@ const fs_operation_def_t zfs_evnodeops_template[] = { VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, NULL, NULL }; +#endif /* HAVE_ZPL */ diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index e6ec7bcf67..8ce5bad2aa 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -87,6 +87,7 @@ * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL +#ifdef HAVE_ZPL /* * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to * be freed before it can be safely accessed. @@ -1473,21 +1474,28 @@ log: dmu_tx_commit(tx); return (0); } +#endif /* HAVE_ZPL */ void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { - zfsvfs_t zfsvfs; uint64_t moid, obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; +#ifdef HAVE_ZPL + zfsvfs_t zfsvfs; znode_t *rootzp = NULL; vnode_t *vp; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; +#else + timestruc_t now; + dmu_buf_t *db; + znode_phys_t *pzp; +#endif /* HAVE_ZPL */ /* * First attempt to create master node. @@ -1542,6 +1550,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); +#ifdef HAVE_ZPL /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. @@ -1596,14 +1605,46 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) dmu_buf_rele(rootzp->z_dbuf, NULL); rootzp->z_dbuf = NULL; kmem_cache_free(znode_cache, rootzp); + error = zfs_create_share_dir(&zfsvfs, tx); +#else + /* + * Create root znode with code free of VFS dependencies + */ + obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_ZNODE, sizeof (znode_phys_t), tx); + + VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db)); + dmu_buf_will_dirty(db, tx); /* - * Create shares directory + * Initialize the znode physical data to zero. */ + ASSERT(db->db_size >= sizeof (znode_phys_t)); + bzero(db->db_data, db->db_size); + pzp = db->db_data; - error = zfs_create_share_dir(&zfsvfs, tx); + if (USE_FUIDS(version, os)) + pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; + pzp->zp_size = 2; /* "." and ".." */ + pzp->zp_links = 2; + pzp->zp_parent = obj; + pzp->zp_gen = dmu_tx_get_txg(tx); + pzp->zp_mode = S_IFDIR | 0755; + pzp->zp_flags = ZFS_ACL_TRIVIAL; + + gethrestime(&now); + + ZFS_TIME_ENCODE(&now, pzp->zp_crtime); + ZFS_TIME_ENCODE(&now, pzp->zp_ctime); + ZFS_TIME_ENCODE(&now, pzp->zp_atime); + ZFS_TIME_ENCODE(&now, pzp->zp_mtime); + + error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx); ASSERT(error == 0); + + dmu_buf_rele(db, FTAG); +#endif /* HAVE_ZPL */ } #endif /* _KERNEL */ diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c new file mode 100644 index 0000000000..cc95bf2fc2 --- /dev/null +++ b/module/zfs/zvol.c @@ -0,0 +1,1199 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + * + * ZFS volume emulation driver. + * + * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes. + * Volumes are accessed through the symbolic links named: + * + * /dev// + * + * Volumes are persistent through reboot. No user command needs to be + * run before opening and using a device. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int zvol_major = ZVOL_MAJOR; +unsigned int zvol_threads = 0; + +static taskq_t *zvol_taskq; +static kmutex_t zvol_state_lock; +static list_t zvol_state_list; + +/* + * The in-core state of each volume. + */ +typedef struct zvol_state { + uint64_t zv_volsize; /* advertised space */ + uint64_t zv_volblocksize;/* volume block size */ + objset_t *zv_objset; /* objset handle */ + uint32_t zv_mode; /* DS_MODE_* at open time */ + uint32_t zv_open_count; /* open counts */ + uint32_t zv_changed; /* disk changed */ + zilog_t *zv_zilog; /* ZIL handle */ + znode_t zv_znode; /* for range locking */ + dev_t zv_dev; /* device id */ + struct gendisk *zv_disk; /* generic disk */ + struct request_queue *zv_queue; /* request queue */ + spinlock_t zv_lock; /* request queue lock */ + list_node_t zv_next; /* next zvol_state_t linkage */ +} zvol_state_t; + +/* + * Find the next available range of ZVOL_MINORS minor numbers. The + * zvol_state_list is kept in ascending minor order so we simply need + * to scan the list for the first gap in the sequence. This allows us + * to recycle minor number as devices are created and removed. + */ +static int +zvol_find_minor(unsigned *minor) +{ + zvol_state_t *zv; + + *minor = 0; + ASSERT(MUTEX_HELD(&zvol_state_lock)); + for (zv = list_head(&zvol_state_list); zv != NULL; + zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) { + if (MINOR(zv->zv_dev) != MINOR(*minor)) + break; + } + + /* All minors are in use */ + if (*minor >= (1 << MINORBITS)) + return ENXIO; + + return 0; +} + +/* + * Find a zvol_state_t given the full major+minor dev_t. + */ +static zvol_state_t * +zvol_find_by_dev(dev_t dev) +{ + zvol_state_t *zv; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + for (zv = list_head(&zvol_state_list); zv != NULL; + zv = list_next(&zvol_state_list, zv)) { + if (zv->zv_dev == dev) + return zv; + } + + return NULL; +} + +/* + * Find a zvol_state_t given the name provided at zvol_alloc() time. + */ +static zvol_state_t * +zvol_find_by_name(const char *name) +{ + zvol_state_t *zv; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + for (zv = list_head(&zvol_state_list); zv != NULL; + zv = list_next(&zvol_state_list, zv)) { + if (!strncmp(zv->zv_disk->disk_name, name, DISK_NAME_LEN)) + return zv; + } + + return NULL; +} + +/* + * ZFS_IOC_CREATE callback handles dmu zvol and zap object creation. + */ +void +zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx) +{ + zfs_creat_t *zct = arg; + nvlist_t *nvprops = zct->zct_props; + int error; + uint64_t volblocksize, volsize; + + VERIFY(nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0); + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0) + volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); + + /* + * These properties must be removed from the list so the generic + * property setting step won't apply to them. + */ + VERIFY(nvlist_remove_all(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0); + (void) nvlist_remove_all(nvprops, + zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE)); + + error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + + error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP, + DMU_OT_NONE, 0, tx); + ASSERT(error == 0); + + error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx); + ASSERT(error == 0); +} + +/* + * ZFS_IOC_OBJSET_STATS entry point. + */ +int +zvol_get_stats(objset_t *os, nvlist_t *nv) +{ + int error; + dmu_object_info_t doi; + uint64_t val; + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val); + if (error) + return (error); + + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val); + + error = dmu_object_info(os, ZVOL_OBJ, &doi); + + if (error == 0) { + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE, + doi.doi_data_block_size); + } + + return (error); +} + +/* + * Notification handler for objset readonly property changes. + */ +static void +zvol_readonly_changed_cb(void *arg, uint64_t value) +{ + set_disk_ro(((zvol_state_t *)arg)->zv_disk, !!value); +} + +/* + * Sanity check volume size. + */ +int +zvol_check_volsize(uint64_t volsize, uint64_t blocksize) +{ + if (volsize == 0) + return (EINVAL); + + if (volsize % blocksize != 0) + return (EINVAL); + +#ifdef _ILP32 + if (volsize - 1 > SPEC_MAXOFFSET_T) + return (EOVERFLOW); +#endif + return (0); +} + +/* + * Ensure the zap is flushed then inform the VFS of the capacity change. + */ +static int +zvol_update_volsize(zvol_state_t *zv, uint64_t volsize) +{ + dmu_tx_t *tx; + int error; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + + tx = dmu_tx_create(zv->zv_objset); + dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + return (error); + } + + error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1, + &volsize, tx); + dmu_tx_commit(tx); + + if (error) + return (error); + + error = dmu_free_long_range(zv->zv_objset, + ZVOL_OBJ, volsize, DMU_OBJECT_END); + if (error) + return (error); + + zv->zv_volsize = volsize; + zv->zv_changed = 1; + error = revalidate_disk(zv->zv_disk); + + return (error); +} + +/* + * Set ZFS_PROP_VOLSIZE set entry point. + */ +int +zvol_set_volsize(const char *name, uint64_t volsize) +{ + zvol_state_t *zv; + int error; + dmu_object_info_t doi; + uint64_t old_volsize = 0ULL; + zvol_state_t state = { 0 }; + + mutex_enter(&zvol_state_lock); + + zv = zvol_find_by_name(name); + if (zv == NULL) { + /* + * If we are doing a "zfs clone -o volsize=", then the + * minor node won't exist yet. + */ + error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER, + &state.zv_objset); + if (error != 0) + goto out; + zv = &state; + } + old_volsize = zv->zv_volsize; + + if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || + (error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0) + goto out; + + if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY)) { + error = EROFS; + goto out; + } + + error = zvol_update_volsize(zv, volsize); +out: + if (state.zv_objset) + dmu_objset_close(state.zv_objset); + + mutex_exit(&zvol_state_lock); + + return (error); +} + +/* + * Sanity check volume block size. + */ +int +zvol_check_volblocksize(uint64_t volblocksize) +{ + if (volblocksize < SPA_MINBLOCKSIZE || + volblocksize > SPA_MAXBLOCKSIZE || + !ISP2(volblocksize)) + return (EDOM); + + return (0); +} + +/* + * Set ZFS_PROP_VOLBLOCKSIZE set entry point. + */ +int +zvol_set_volblocksize(const char *name, uint64_t volblocksize) +{ + zvol_state_t *zv; + dmu_tx_t *tx; + int error; + + mutex_enter(&zvol_state_lock); + + zv = zvol_find_by_name(name); + if (zv == NULL) + return (ENXIO); + + if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY)) + return (EROFS); + + tx = dmu_tx_create(zv->zv_objset); + dmu_tx_hold_bonus(tx, ZVOL_OBJ); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ, + volblocksize, 0, tx); + if (error == ENOTSUP) + error = EBUSY; + dmu_tx_commit(tx); + if (error == 0) + zv->zv_volblocksize = volblocksize; + } + + return (error); +} + +/* + * Replay a TX_WRITE ZIL transaction that didn't get committed + * after a system failure + */ +static int +zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) +{ + objset_t *os = zv->zv_objset; + char *data = (char *)(lr + 1); /* data follows lr_write_t */ + uint64_t off = lr->lr_offset; + uint64_t len = lr->lr_length; + dmu_tx_t *tx; + int error; + + if (byteswap) + byteswap_uint64_array(lr, sizeof (*lr)); + + tx = dmu_tx_create(os); + dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + } else { + dmu_write(os, ZVOL_OBJ, off, len, data, tx); + dmu_tx_commit(tx); + } + + return (error); +} + +static int +zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap) +{ + return (ENOTSUP); +} + +/* + * Callback vectors for replaying records. + * Only TX_WRITE is needed for zvol. + */ +zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = { + (zil_replay_func_t *)zvol_replay_err, /* no such transaction type */ + (zil_replay_func_t *)zvol_replay_err, /* TX_CREATE */ + (zil_replay_func_t *)zvol_replay_err, /* TX_MKDIR */ + (zil_replay_func_t *)zvol_replay_err, /* TX_MKXATTR */ + (zil_replay_func_t *)zvol_replay_err, /* TX_SYMLINK */ + (zil_replay_func_t *)zvol_replay_err, /* TX_REMOVE */ + (zil_replay_func_t *)zvol_replay_err, /* TX_RMDIR */ + (zil_replay_func_t *)zvol_replay_err, /* TX_LINK */ + (zil_replay_func_t *)zvol_replay_err, /* TX_RENAME */ + (zil_replay_func_t *)zvol_replay_write, /* TX_WRITE */ + (zil_replay_func_t *)zvol_replay_err, /* TX_TRUNCATE */ + (zil_replay_func_t *)zvol_replay_err, /* TX_SETATTR */ + (zil_replay_func_t *)zvol_replay_err, /* TX_ACL */ +}; + +/* + * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions. + * + * We store data in the log buffers if it's small enough. + * Otherwise we will later flush the data out via dmu_sync(). + */ +ssize_t zvol_immediate_write_sz = 32768; + +static void +zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, + uint64_t offset, uint64_t size, int sync) +{ + uint32_t blocksize = zv->zv_volblocksize; + zilog_t *zilog = zv->zv_zilog; + boolean_t slogging; + + if (zil_disable) + return; + + if (zilog->zl_replay) { + dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); + zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = + zilog->zl_replaying_seq; + return; + } + + slogging = spa_has_slogs(zilog->zl_spa); + + while (size) { + itx_t *itx; + lr_write_t *lr; + ssize_t len; + itx_wr_state_t write_state; + + /* + * Unlike zfs_log_write() we can be called with + * up to DMU_MAX_ACCESS/2 (5MB) writes. + */ + if (blocksize > zvol_immediate_write_sz && !slogging && + size >= blocksize && offset % blocksize == 0) { + write_state = WR_INDIRECT; /* uses dmu_sync */ + len = blocksize; + } else if (sync) { + write_state = WR_COPIED; + len = MIN(ZIL_MAX_LOG_DATA, size); + } else { + write_state = WR_NEED_COPY; + len = MIN(ZIL_MAX_LOG_DATA, size); + } + + itx = zil_itx_create(TX_WRITE, sizeof (*lr) + + (write_state == WR_COPIED ? len : 0)); + lr = (lr_write_t *)&itx->itx_lr; + if (write_state == WR_COPIED && dmu_read(zv->zv_objset, + ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) { + kmem_free(itx, offsetof(itx_t, itx_lr) + + itx->itx_lr.lrc_reclen); + itx = zil_itx_create(TX_WRITE, sizeof (*lr)); + lr = (lr_write_t *)&itx->itx_lr; + write_state = WR_NEED_COPY; + } + + itx->itx_wr_state = write_state; + if (write_state == WR_NEED_COPY) + itx->itx_sod += len; + lr->lr_foid = ZVOL_OBJ; + lr->lr_offset = offset; + lr->lr_length = len; + lr->lr_blkoff = offset - + P2ALIGN_TYPED(offset, blocksize, uint64_t); + BP_ZERO(&lr->lr_blkptr); + + itx->itx_private = zv; + itx->itx_sync = sync; + + (void) zil_itx_assign(zilog, itx, tx); + + offset += len; + size -= len; + } +} + +/* + * Common write path running under the zvol taskq context. This function + * is responsible for copying the request structure data in to the DMU and + * signaling the request queue with the result of the copy. + */ +static void +zvol_write(void *arg) +{ + struct request *req = (struct request *)arg; + struct request_queue *q = req->q; + zvol_state_t *zv = q->queuedata; + uint64_t offset = blk_rq_pos(req) << 9; + uint64_t size = blk_rq_bytes(req); + int sync = 0, error = 0; + dmu_tx_t *tx; + rl_t *rl; + + if (rq_is_sync(req) && !zil_disable) + sync = 1; + + rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); + + tx = dmu_tx_create(zv->zv_objset); + dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size); + + /* This will only fail for ENOSPC */ + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); + zfs_range_unlock(rl); + blk_end_request(req, -error, size); + return; + } + + dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); + zvol_log_write(zv, tx, offset, size, sync); + + dmu_tx_commit(tx); + zfs_range_unlock(rl); + + if (sync) + zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); + + blk_end_request(req, -error, size); +} + +/* + * Common read path running under the zvol taskq context. This function + * is responsible for copying the requested data out of the DMU and in to + * a linux request structure. It then must signal the request queue with + * an error code describing the result of the copy. + */ +static void +zvol_read(void *arg) +{ + struct request *req = (struct request *)arg; + struct request_queue *q = req->q; + zvol_state_t *zv = q->queuedata; + uint64_t offset = blk_rq_pos(req) << 9; + uint64_t size = blk_rq_bytes(req); + int error; + rl_t *rl; + + rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); + + error = dmu_read_req(zv->zv_objset, ZVOL_OBJ, req); + + zfs_range_unlock(rl); + + /* convert checksum errors into IO errors */ + if (error == ECKSUM) + error = EIO; + + blk_end_request(req, -error, size); +} + +/* + * Request will be added back to the request queue and retried if + * it cannot be immediately dispatched to the taskq for handling + */ +static inline void +zvol_dispatch(task_func_t func, struct request *req) +{ + if (!taskq_dispatch(zvol_taskq, func, (void *)req, TQ_NOSLEEP)) + blk_requeue_request(req->q, req); +} + +/* + * Common request path. Rather than registering a custom make_request() + * function we use the generic Linux version. This is done because it allows + * us to easily merge read requests which would otherwise we performed + * synchronously by the DMU. This is less critical in write case where the + * DMU will perform the correct merging within a transaction group. Using + * the generic make_request() also let's use leverage the fact that the + * elevator with ensure correct ordering in regards to barrior IOs. On + * the downside it means that in the write case we end up doing request + * merging twice once in the elevator and once in the DMU. + * + * The request handler is called under a spin lock so all the real work + * is handed off to be done in the context of the zvol taskq. This function + * simply performs basic request sanity checking and hands off the request. + */ +static void +zvol_request(struct request_queue *q) +{ + zvol_state_t *zv = q->queuedata; + struct request *req; + unsigned int size; + + while ((req = blk_fetch_request(q)) != NULL) { + size = blk_rq_bytes(req); + + if (blk_rq_pos(req) + blk_rq_sectors(req) > + get_capacity(zv->zv_disk)) { + printk(KERN_INFO + "%s: bad access: block=%llu, count=%lu\n", + req->rq_disk->disk_name, + (long long unsigned)blk_rq_pos(req), + (long unsigned)blk_rq_sectors(req)); + blk_end_request(req, -EIO, size); + continue; + } + + if (!blk_fs_request(req)) { + printk(KERN_INFO "%s: non-fs cmd\n", + req->rq_disk->disk_name); + blk_end_request(req, -EIO, size); + continue; + } + + switch (rq_data_dir(req)) { + case READ: + zvol_dispatch(zvol_read, req); + break; + case WRITE: + if (unlikely(get_disk_ro(zv->zv_disk))) { + blk_end_request(req, -EROFS, size); + break; + } + + zvol_dispatch(zvol_write, req); + break; + default: + printk(KERN_INFO "%s: unknown cmd: %d\n", + req->rq_disk->disk_name, (int)rq_data_dir(req)); + blk_end_request(req, -EIO, size); + break; + } + } +} + +/* + * The zvol_state_t's are inserted in increasing MINOR(dev_t) order. + */ +static void +zvol_insert(zvol_state_t *zv_insert) +{ + zvol_state_t *zv = NULL; + + ASSERT(MUTEX_HELD(&zvol_state_lock)); + ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0); + for (zv = list_head(&zvol_state_list); zv != NULL; + zv = list_next(&zvol_state_list, zv)) { + if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev)) + break; + } + + list_insert_before(&zvol_state_list, zv, zv_insert); +} + +/* + * Simply remove the zvol from to list of zvols. + */ +static void +zvol_remove(zvol_state_t *zv_remove) +{ + ASSERT(MUTEX_HELD(&zvol_state_lock)); + list_remove(&zvol_state_list, zv_remove); +} + +#ifndef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS +typedef unsigned __bitwise__ fmode_t; +#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ + +static int +zvol_open(struct block_device *bdev, fmode_t flag) +{ + zvol_state_t *zv = bdev->bd_disk->private_data; + + mutex_enter(&zvol_state_lock); + ASSERT3P(zv, !=, NULL); + ASSERT3P(zv->zv_objset, !=, NULL); + + if ((flag & FMODE_WRITE) && + (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY))) { + mutex_exit(&zvol_state_lock); + return (-EROFS); + } + + zv->zv_open_count++; + mutex_exit(&zvol_state_lock); + + check_disk_change(bdev); + + return (0); +} + +static int +zvol_release(struct gendisk *disk, fmode_t mode) +{ + zvol_state_t *zv = disk->private_data; + + mutex_enter(&zvol_state_lock); + ASSERT3P(zv, !=, NULL); + ASSERT3U(zv->zv_open_count, >, 0); + zv->zv_open_count--; + mutex_exit(&zvol_state_lock); + + return (0); +} + +static void +zvol_get_done(dmu_buf_t *db, void *vzgd) +{ + zgd_t *zgd = (zgd_t *)vzgd; + rl_t *rl = zgd->zgd_rl; + + dmu_buf_rele(db, vzgd); + zfs_range_unlock(rl); + zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); + kmem_free(zgd, sizeof (zgd_t)); +} + +/* + * Get data to generate a TX_WRITE intent log record. + */ +static int +zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +{ + zvol_state_t *zv = arg; + objset_t *os = zv->zv_objset; + dmu_buf_t *db; + rl_t *rl; + zgd_t *zgd; + uint64_t boff; /* block starting offset */ + int dlen = lr->lr_length; /* length of user data */ + int error; + + ASSERT(zio); + ASSERT(dlen != 0); + + /* + * Write records come in two flavors: immediate and indirect. + * For small writes it's cheaper to store the data with the + * log record (immediate); for large writes it's cheaper to + * sync the data and get a pointer to it (indirect) so that + * we don't have to write the data twice. + */ + if (buf != NULL) /* immediate write */ + return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf, + DMU_READ_NO_PREFETCH)); + + zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); + zgd->zgd_zilog = zv->zv_zilog; + zgd->zgd_bp = &lr->lr_blkptr; + + /* + * Lock the range of the block to ensure that when the data is + * written out and its checksum is being calculated that no other + * thread can change the block. + */ + boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); + rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, + RL_READER); + zgd->zgd_rl = rl; + + VERIFY3S(dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db), ==, 0); + error = dmu_sync(zio, db, &lr->lr_blkptr, + lr->lr_common.lrc_txg, zvol_get_done, zgd); + if (error == 0) + zil_add_block(zv->zv_zilog, &lr->lr_blkptr); + /* + * If we get EINPROGRESS, then we need to wait for a + * write IO initiated by dmu_sync() to complete before + * we can release this dbuf. We will finish everything + * up in the zvol_get_done() callback. + */ + if (error == EINPROGRESS) + return (0); + + dmu_buf_rele(db, zgd); + zfs_range_unlock(rl); + kmem_free(zgd, sizeof (zgd_t)); + + return (error); +} + + +static int +zvol_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + zvol_state_t *zv = bdev->bd_disk->private_data; + int error = 0; + + if (zv == NULL) + return (-ENXIO); + + switch (cmd) { + case BLKFLSBUF: + zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); + break; + + default: + error = -ENOTTY; + break; + + } + + return (error); +} + +#ifdef CONFIG_COMPAT +static int +zvol_compat_ioctl(struct block_device *bdev, fmode_t mode, + unsigned cmd, unsigned long arg) +{ + return zvol_ioctl(bdev, mode, cmd, arg); +} +#else +#define zvol_compat_ioctl NULL +#endif + +static int zvol_media_changed(struct gendisk *disk) +{ + zvol_state_t *zv = disk->private_data; + + return zv->zv_changed; +} + +static int zvol_revalidate_disk(struct gendisk *disk) +{ + zvol_state_t *zv = disk->private_data; + + zv->zv_changed = 0; + set_capacity(zv->zv_disk, zv->zv_volsize >> 9); + + return 0; +} + +/* + * Provide a simple virtual geometry for legacy compatibility. For devices + * smaller than 1 MiB a small head and sector count is used to allow very + * tiny devices. For devices over 1 Mib a standard head and sector count + * is used to keep the cylinders count reasonable. + */ +static int +zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + zvol_state_t *zv = bdev->bd_disk->private_data; + sector_t sectors = get_capacity(zv->zv_disk); + + if (sectors > 2048) { + geo->heads = 16; + geo->sectors = 63; + } else { + geo->heads = 2; + geo->sectors = 4; + } + + geo->start = 0; + geo->cylinders = sectors / (geo->heads * geo->sectors); + + return 0; +} + +static struct kobject * +zvol_probe(dev_t dev, int *part, void *arg) +{ + zvol_state_t *zv; + struct kobject *kobj; + + mutex_enter(&zvol_state_lock); + zv = zvol_find_by_dev(dev); + kobj = zv ? get_disk(zv->zv_disk) : ERR_PTR(-ENOENT); + mutex_exit(&zvol_state_lock); + + return kobj; +} + +#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS +static struct block_device_operations zvol_ops = { + .open = zvol_open, + .release = zvol_release, + .ioctl = zvol_ioctl, + .compat_ioctl = zvol_compat_ioctl, + .media_changed = zvol_media_changed, + .revalidate_disk = zvol_revalidate_disk, + .getgeo = zvol_getgeo, + .owner = THIS_MODULE, +}; + +#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ + +static int +zvol_open_by_inode(struct inode *inode, struct file *file) +{ + return zvol_open(inode->i_bdev, file->f_mode); +} + +static int +zvol_release_by_inode(struct inode *inode, struct file *file) +{ + return zvol_release(inode->i_bdev->bd_disk, file->f_mode); +} + +static int +zvol_ioctl_by_inode(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + return zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg); +} + +# ifdef CONFIG_COMPAT +static long +zvol_compat_ioctl_by_inode(struct file *file, + unsigned int cmd, unsigned long arg) +{ + return zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev, + file->f_mode, cmd, arg); +} +# else +# define zvol_compat_ioctl_by_inode NULL +# endif + +static struct block_device_operations zvol_ops = { + .open = zvol_open_by_inode, + .release = zvol_release_by_inode, + .ioctl = zvol_ioctl_by_inode, + .compat_ioctl = zvol_compat_ioctl_by_inode, + .media_changed = zvol_media_changed, + .revalidate_disk = zvol_revalidate_disk, + .getgeo = zvol_getgeo, + .owner = THIS_MODULE, +}; +#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */ + +/* + * Allocate memory for a new zvol_state_t and setup the required + * request queue and generic disk structures for the block device. + */ +static zvol_state_t * +zvol_alloc(dev_t dev, const char *name) +{ + zvol_state_t *zv; + + zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); + if (zv == NULL) + goto out; + + zv->zv_queue = blk_init_queue(zvol_request, &zv->zv_lock); + if (zv->zv_queue == NULL) + goto out_kmem; + + zv->zv_disk = alloc_disk(ZVOL_MINORS); + if (zv->zv_disk == NULL) + goto out_queue; + + zv->zv_queue->queuedata = zv; + zv->zv_dev = dev; + zv->zv_open_count = 0; + + mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare, + sizeof (rl_t), offsetof(rl_t, r_node)); + spin_lock_init(&zv->zv_lock); + list_link_init(&zv->zv_next); + + zv->zv_disk->major = zvol_major; + zv->zv_disk->first_minor = (dev & MINORMASK); + zv->zv_disk->fops = &zvol_ops; + zv->zv_disk->private_data = zv; + zv->zv_disk->queue = zv->zv_queue; + strlcpy(zv->zv_disk->disk_name, name, DISK_NAME_LEN); + + return zv; + +out_queue: + blk_cleanup_queue(zv->zv_queue); +out_kmem: + kmem_free(zv, sizeof (zvol_state_t)); +out: + return NULL; +} + +/* + * Cleanup then free a zvol_state_t which was created by zvol_alloc(). + */ +static void +zvol_free(zvol_state_t *zv) +{ + avl_destroy(&zv->zv_znode.z_range_avl); + mutex_destroy(&zv->zv_znode.z_range_lock); + + del_gendisk(zv->zv_disk); + blk_cleanup_queue(zv->zv_queue); + put_disk(zv->zv_disk); + + kmem_free(zv, sizeof (zvol_state_t)); +} + +/* + * Create a block device minor node and setup the linkage between it + * and the specified volume. Once this function returns the block + * device is live and ready for use. + */ +int +zvol_create_minor(const char *name) +{ + zvol_state_t *zv; + objset_t *os; + dmu_object_info_t doi; + uint64_t volsize; + unsigned minor = 0; + int ds_mode = DS_MODE_OWNER; + int error = 0; + + mutex_enter(&zvol_state_lock); + + zv = zvol_find_by_name(name); + if (zv) { + error = EEXIST; + goto out; + } + + /* Snapshot may only be read-only */ + if (strchr(name, '@') != 0) + ds_mode |= DS_MODE_READONLY; + + error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); + if (error) + goto out; + + error = dmu_object_info(os, ZVOL_OBJ, &doi); + if (error) + goto out_dmu_objset_close; + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); + if (error) + goto out_dmu_objset_close; + + error = zvol_find_minor(&minor); + if (error) + goto out_dmu_objset_close; + + zv = zvol_alloc(MKDEV(zvol_major, minor), name); + if (zv == NULL) { + error = EAGAIN; + goto out_dmu_objset_close; + } + + set_disk_ro(zv->zv_disk, !!(ds_mode & DS_MODE_READONLY)); + set_capacity(zv->zv_disk, volsize >> 9); + + zv->zv_volsize = volsize; + zv->zv_volblocksize = doi.doi_data_block_size; + zv->zv_objset = os; + zv->zv_mode = ds_mode; + zv->zv_zilog = zil_open(os, zvol_get_data); + zil_replay(os, zv, zvol_replay_vector); + + error = dsl_prop_register(dmu_objset_ds(zv->zv_objset), "readonly", + zvol_readonly_changed_cb, zv); + if (error) + goto out_zvol_alloc; + + zvol_insert(zv); + mutex_exit(&zvol_state_lock); + add_disk(zv->zv_disk); + + return 0; + +out_zvol_alloc: + zvol_free(zv); +out_dmu_objset_close: + dmu_objset_close(os); +out: + mutex_exit(&zvol_state_lock); + + return (-error); +} + +/* + * Remove a block device minor node for the specified volume. + */ +int +zvol_remove_minor(const char *name) +{ + zvol_state_t *zv; + int error = 0; + + mutex_enter(&zvol_state_lock); + + zv = zvol_find_by_name(name); + if (zv == NULL) { + error = ENXIO; + goto out; + } + + if (zv->zv_open_count > 0) { + error = EBUSY; + goto out; + } + + error = dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), + "readonly", zvol_readonly_changed_cb, zv); + if (error) + goto out; + + zil_close(zv->zv_zilog); + dmu_objset_close(zv->zv_objset); + + zvol_remove(zv); + zvol_free(zv); +out: + mutex_exit(&zvol_state_lock); + + return (-error); +} + +/* + * Remove all minors from the system. This is only called from + * zvol_fini() which means the module reference count must have + * dropped to zero and none of the zvol devices may be open. + */ +static void +zvol_remove_minors(void) +{ + zvol_state_t *zv; + + mutex_enter(&zvol_state_lock); + while ((zv = list_head(&zvol_state_list)) != NULL) { + ASSERT3U(zv->zv_open_count, ==, 0); + + (void)dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), + "readonly", zvol_readonly_changed_cb, zv); + zil_close(zv->zv_zilog); + dmu_objset_close(zv->zv_objset); + + zvol_remove(zv); + zvol_free(zv); + } + mutex_exit(&zvol_state_lock); +} + +int +zvol_init(void) +{ + int error; + + if (!zvol_threads) + zvol_threads = num_online_cpus(); + + zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_threads, maxclsyspri, + zvol_threads, INT_MAX, TASKQ_PREPOPULATE); + if (zvol_taskq == NULL) { + printk(KERN_INFO "ZFS: taskq_create() failed\n"); + return (-ENOMEM); + } + + error = register_blkdev(zvol_major, ZVOL_DRIVER); + if (error) { + printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error); + taskq_destroy(zvol_taskq); + return (error); + } + + blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS, + THIS_MODULE, zvol_probe, NULL, NULL); + + mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL); + list_create(&zvol_state_list, sizeof (zvol_state_t), + offsetof(zvol_state_t, zv_next)); + + return (0); +} + +void +zvol_fini(void) +{ + zvol_remove_minors(); + blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS); + unregister_blkdev(zvol_major, ZVOL_DRIVER); + taskq_destroy(zvol_taskq); + + mutex_destroy(&zvol_state_lock); + list_destroy(&zvol_state_list); +} + +module_param(zvol_major, uint, 0); +MODULE_PARM_DESC(zvol_major, "Major number for zvol device"); + +module_param(zvol_threads, uint, 0); +MODULE_PARM_DESC(zvol_threads, "Number of threads for zvol device");