From 96c4d9dd6d7cb80b0c79bf3de3db39da9900fcf1 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:22:51 +1000 Subject: [PATCH 1/6] arc_os: split userspace and Linux kernel code The Linux arc_os.c carries userspace and kernel code, with very little overlap between the two. This lifts the userspace parts out into a separate arc_os.c for libzpool and removes it from the Linux side. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- lib/libzpool/Makefile.am | 2 +- lib/libzpool/arc_os.c | 87 ++++++++++++++++++++++++++++++++++++ module/os/linux/zfs/arc_os.c | 47 ------------------- 3 files changed, 88 insertions(+), 48 deletions(-) create mode 100644 lib/libzpool/arc_os.c diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index ff30af7d2b..5a89b60a59 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -12,6 +12,7 @@ CPPCHECKTARGETS += libzpool.la dist_libzpool_la_SOURCES = \ %D%/abd_os.c \ + %D%/arc_os.c \ %D%/kernel.c \ %D%/taskq.c \ %D%/util.c @@ -42,7 +43,6 @@ nodist_libzpool_la_SOURCES = \ module/lua/lvm.c \ module/lua/lzio.c \ \ - module/os/linux/zfs/arc_os.c \ module/os/linux/zfs/trace.c \ module/os/linux/zfs/vdev_file.c \ module/os/linux/zfs/vdev_label_os.c \ diff --git a/lib/libzpool/arc_os.c b/lib/libzpool/arc_os.c new file mode 100644 index 0000000000..ed99322ddb --- /dev/null +++ b/lib/libzpool/arc_os.c @@ -0,0 +1,87 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. + * Copyright (c) 2011, 2019 by Delphix. All rights reserved. + * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. + * Copyright 2017 Nexenta Systems, Inc. All rights reserved. + */ + +#include +#include + +/* + * Return a default max arc size based on the amount of physical memory. + * This may be overridden by tuning the zfs_arc_max module parameter. + */ +uint64_t +arc_default_max(uint64_t min, uint64_t allmem) +{ + uint64_t size; + + if (allmem >= 1 << 30) + size = allmem - (1 << 30); + else + size = min; + return (MAX(allmem * 5 / 8, size)); +} + +int64_t +arc_available_memory(void) +{ + int64_t lowest = INT64_MAX; + + /* Every 100 calls, free a small amount */ + if (random_in_range(100) == 0) + lowest = -1024; + + return (lowest); +} + +int +arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) +{ + (void) spa, (void) reserve, (void) txg; + return (0); +} + +uint64_t +arc_all_memory(void) +{ + return (ptob(physmem) / 2); +} + +uint64_t +arc_free_memory(void) +{ + return (random_in_range(arc_all_memory() * 20 / 100)); +} + +void +arc_register_hotplug(void) +{ +} + +void +arc_unregister_hotplug(void) +{ +} diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index c6b9cb2ddb..b1e45b2874 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -42,7 +42,6 @@ #include #include #include -#ifdef _KERNEL #include #include #include @@ -50,7 +49,6 @@ #include #include #include -#endif #include #include #include @@ -59,7 +57,6 @@ #include #include -#ifdef _KERNEL /* * This is a limit on how many pages the ARC shrinker makes available for * eviction in response to one page allocation attempt. Note that in @@ -87,7 +84,6 @@ static int zfs_arc_shrinker_seeks = DEFAULT_SEEKS; #ifdef CONFIG_MEMORY_HOTPLUG static struct notifier_block arc_hotplug_callback_mem_nb; #endif -#endif /* * Return a default max arc size based on the amount of physical memory. @@ -105,7 +101,6 @@ arc_default_max(uint64_t min, uint64_t allmem) return (MAX(allmem * 5 / 8, size)); } -#ifdef _KERNEL /* * Return maximum amount of memory that we could possibly use. Reduced * to half of all memory in user space which is primarily used for testing. @@ -459,48 +454,6 @@ arc_unregister_hotplug(void) unregister_memory_notifier(&arc_hotplug_callback_mem_nb); #endif } -#else /* _KERNEL */ -int64_t -arc_available_memory(void) -{ - int64_t lowest = INT64_MAX; - - /* Every 100 calls, free a small amount */ - if (random_in_range(100) == 0) - lowest = -1024; - - return (lowest); -} - -int -arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg) -{ - (void) spa, (void) reserve, (void) txg; - return (0); -} - -uint64_t -arc_all_memory(void) -{ - return (ptob(physmem) / 2); -} - -uint64_t -arc_free_memory(void) -{ - return (random_in_range(arc_all_memory() * 20 / 100)); -} - -void -arc_register_hotplug(void) -{ -} - -void -arc_unregister_hotplug(void) -{ -} -#endif /* _KERNEL */ ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW, "Limit on number of pages that ARC shrinker can reclaim at once"); From 63b9206101d7f827318b6b59b0fdbecd986becbd Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:27:13 +1000 Subject: [PATCH 2/6] vdev_label_os: copy Linux implementation for userspace The no-op is fine for both. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- lib/libzpool/Makefile.am | 4 ++-- lib/libzpool/vdev_label_os.c | 45 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 lib/libzpool/vdev_label_os.c diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 5a89b60a59..90a0dede3a 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -15,7 +15,8 @@ dist_libzpool_la_SOURCES = \ %D%/arc_os.c \ %D%/kernel.c \ %D%/taskq.c \ - %D%/util.c + %D%/util.c \ + %D%/vdev_label_os.c nodist_libzpool_la_SOURCES = \ module/lua/lapi.c \ @@ -45,7 +46,6 @@ nodist_libzpool_la_SOURCES = \ \ module/os/linux/zfs/trace.c \ module/os/linux/zfs/vdev_file.c \ - module/os/linux/zfs/vdev_label_os.c \ module/os/linux/zfs/zfs_debug.c \ module/os/linux/zfs/zfs_racct.c \ module/os/linux/zfs/zfs_znode.c \ diff --git a/lib/libzpool/vdev_label_os.c b/lib/libzpool/vdev_label_os.c new file mode 100644 index 0000000000..3d965b89a9 --- /dev/null +++ b/lib/libzpool/vdev_label_os.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2023 by iXsystems, Inc. + */ + +#include +#include +#include +#include +#include + +/* + * Check if the reserved boot area is in-use. + * + * This function always returns 0, as there are no known external uses + * of the reserved area on Linux. + */ +int +vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd) +{ + (void) spa; + (void) childvd; + + return (0); +} From 924961b2d87f4da3bf3d81ab448f67bd2173530d Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:27:27 +1000 Subject: [PATCH 3/6] libzpool: don't include trace.c It does nothing in userspace anyway. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- lib/libzpool/Makefile.am | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 90a0dede3a..c28d3b4bdf 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -44,7 +44,6 @@ nodist_libzpool_la_SOURCES = \ module/lua/lvm.c \ module/lua/lzio.c \ \ - module/os/linux/zfs/trace.c \ module/os/linux/zfs/vdev_file.c \ module/os/linux/zfs/zfs_debug.c \ module/os/linux/zfs/zfs_racct.c \ From 8888f42e304012e6c5a8cd55a0817ee2c01b666f Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:28:16 +1000 Subject: [PATCH 4/6] zfs_racct: copy Linux implementation for userspace The no-op is fine for both. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- lib/libzpool/Makefile.am | 4 ++-- lib/libzpool/zfs_racct.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 lib/libzpool/zfs_racct.c diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index c28d3b4bdf..cab63ac953 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -16,7 +16,8 @@ dist_libzpool_la_SOURCES = \ %D%/kernel.c \ %D%/taskq.c \ %D%/util.c \ - %D%/vdev_label_os.c + %D%/vdev_label_os.c \ + %D%/zfs_racct.c nodist_libzpool_la_SOURCES = \ module/lua/lapi.c \ @@ -46,7 +47,6 @@ nodist_libzpool_la_SOURCES = \ \ module/os/linux/zfs/vdev_file.c \ module/os/linux/zfs/zfs_debug.c \ - module/os/linux/zfs/zfs_racct.c \ module/os/linux/zfs/zfs_znode.c \ module/os/linux/zfs/zio_crypt.c \ \ diff --git a/lib/libzpool/zfs_racct.c b/lib/libzpool/zfs_racct.c new file mode 100644 index 0000000000..ce623ef9d1 --- /dev/null +++ b/lib/libzpool/zfs_racct.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +void +zfs_racct_read(uint64_t size, uint64_t iops) +{ + (void) size, (void) iops; +} + +void +zfs_racct_write(uint64_t size, uint64_t iops) +{ + (void) size, (void) iops; +} From d449a4faa8b6caeb7646459e419105fa7363cb16 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:28:24 +1000 Subject: [PATCH 5/6] zfs_znode: lift common code to a single shared file For now, userspace has no znode implementation. Some of the property and path handling code is used there though and is the same on all platforms, so we only need a single copy of it. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- include/sys/zfs_znode.h | 2 + lib/libzpool/Makefile.am | 2 +- module/Kbuild.in | 3 +- module/Makefile.bsd | 3 +- .../zfs/{zfs_znode.c => zfs_znode_os.c} | 381 ----------------- .../linux/zfs/{zfs_znode.c => zfs_znode_os.c} | 364 ---------------- module/zfs/zfs_znode.c | 397 ++++++++++++++++++ 7 files changed, 404 insertions(+), 748 deletions(-) rename module/os/freebsd/zfs/{zfs_znode.c => zfs_znode_os.c} (84%) rename module/os/linux/zfs/{zfs_znode.c => zfs_znode_os.c} (86%) create mode 100644 module/zfs/zfs_znode.c diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index d71144807f..071bbf1203 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -158,6 +158,8 @@ extern "C" { #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); +extern int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, + sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir); extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); #ifdef _KERNEL diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index cab63ac953..27b8b9070b 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -47,7 +47,6 @@ nodist_libzpool_la_SOURCES = \ \ module/os/linux/zfs/vdev_file.c \ module/os/linux/zfs/zfs_debug.c \ - module/os/linux/zfs/zfs_znode.c \ module/os/linux/zfs/zio_crypt.c \ \ module/zcommon/cityhash.c \ @@ -182,6 +181,7 @@ nodist_libzpool_la_SOURCES = \ module/zfs/zfs_ratelimit.c \ module/zfs/zfs_rlock.c \ module/zfs/zfs_sa.c \ + module/zfs/zfs_znode.c \ module/zfs/zil.c \ module/zfs/zio.c \ module/zfs/zio_checksum.c \ diff --git a/module/Kbuild.in b/module/Kbuild.in index 0472a9348c..277044a126 100644 --- a/module/Kbuild.in +++ b/module/Kbuild.in @@ -423,6 +423,7 @@ ZFS_OBJS := \ zfs_rlock.o \ zfs_sa.o \ zfs_vnops.o \ + zfs_znode.o \ zil.o \ zio.o \ zio_checksum.o \ @@ -457,7 +458,7 @@ ZFS_OBJS_OS := \ zfs_uio.o \ zfs_vfsops.o \ zfs_vnops_os.o \ - zfs_znode.o \ + zfs_znode_os.o \ zio_crypt.o \ zpl_ctldir.o \ zpl_export.o \ diff --git a/module/Makefile.bsd b/module/Makefile.bsd index 9161204c99..e7c75d3f1d 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -212,7 +212,7 @@ SRCS+= abd_os.c \ zfs_racct.c \ zfs_vfsops.c \ zfs_vnops_os.c \ - zfs_znode.c \ + zfs_znode_os.c \ zio_crypt.c \ zvol_os.c @@ -357,6 +357,7 @@ SRCS+= abd.c \ zfs_rlock.c \ zfs_sa.c \ zfs_vnops.c \ + zfs_znode.c \ zil.c \ zio.c \ zio_checksum.c \ diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode_os.c similarity index 84% rename from module/os/freebsd/zfs/zfs_znode.c rename to module/os/freebsd/zfs/zfs_znode_os.c index e5c50874e1..ca247576a0 100644 --- a/module/os/freebsd/zfs/zfs_znode.c +++ b/module/os/freebsd/zfs/zfs_znode_os.c @@ -27,7 +27,6 @@ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2011 Martin Matuska */ -#ifdef _KERNEL #include #include #include @@ -52,8 +51,6 @@ #include #include #include -#endif /* _KERNEL */ - #include #include #include @@ -86,12 +83,6 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ -/* - * Functions needed for userland (ie: libzpool) are not put under - * #ifdef_KERNEL; the rest of the functions have dependencies - * (such as VFS logic) that will not compile easily in userland. - */ -#ifdef _KERNEL #if !defined(KMEM_DEBUG) #define _ZFS_USE_SMR static uma_zone_t znode_uma_zone; @@ -1787,376 +1778,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) mutex_destroy(&zfsvfs->z_hold_mtx[i]); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } -#endif /* _KERNEL */ -static int -zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) -{ - uint64_t sa_obj = 0; - int error; - - error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - if (error != 0 && error != ENOENT) - return (error); - - error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); - return (error); -} - -static int -zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db, const void *tag) -{ - dmu_object_info_t doi; - int error; - - if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) - return (error); - - dmu_object_info_from_db(*db, &doi); - if ((doi.doi_bonus_type != DMU_OT_SA && - doi.doi_bonus_type != DMU_OT_ZNODE) || - (doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, tag); - return (SET_ERROR(ENOTSUP)); - } - - error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); - if (error != 0) { - sa_buf_rele(*db, tag); - return (error); - } - - return (0); -} - -static void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) -{ - sa_handle_destroy(hdl); - sa_buf_rele(db, tag); -} - -/* - * Given an object number, return its parent object number and whether - * or not the object is an extended attribute directory. - */ -static int -zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, - uint64_t *pobjp, int *is_xattrdir) -{ - uint64_t parent; - uint64_t pflags; - uint64_t mode; - uint64_t parent_mode; - sa_bulk_attr_t bulk[3]; - sa_handle_t *sa_hdl; - dmu_buf_t *sa_db; - int count = 0; - int error; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, - &parent, sizeof (parent)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, - &pflags, sizeof (pflags)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &mode, sizeof (mode)); - - if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) - return (error); - - /* - * When a link is removed its parent pointer is not changed and will - * be invalid. There are two cases where a link is removed but the - * file stays around, when it goes to the delete queue and when there - * are additional links. - */ - error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); - if (error != 0) - return (error); - - error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - if (error != 0) - return (error); - - *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); - - /* - * Extended attributes can be applied to files, directories, etc. - * Otherwise the parent must be a directory. - */ - if (!*is_xattrdir && !S_ISDIR(parent_mode)) - return (SET_ERROR(EINVAL)); - - *pobjp = parent; - - return (0); -} - -/* - * Given an object number, return some zpl level statistics - */ -static int -zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, - zfs_stat_t *sb) -{ - sa_bulk_attr_t bulk[4]; - int count = 0; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &sb->zs_mode, sizeof (sb->zs_mode)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, - &sb->zs_gen, sizeof (sb->zs_gen)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, - &sb->zs_links, sizeof (sb->zs_links)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, - &sb->zs_ctime, sizeof (sb->zs_ctime)); - - return (sa_bulk_lookup(hdl, bulk, count)); -} - -static int -zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, - sa_attr_type_t *sa_table, char *buf, int len) -{ - sa_handle_t *sa_hdl; - sa_handle_t *prevhdl = NULL; - dmu_buf_t *prevdb = NULL; - dmu_buf_t *sa_db = NULL; - char *path = buf + len - 1; - int error; - - *path = '\0'; - sa_hdl = hdl; - - uint64_t deleteq_obj; - VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, - ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); - error = zap_lookup_int(osp, deleteq_obj, obj); - if (error == 0) { - return (ESTALE); - } else if (error != ENOENT) { - return (error); - } - - for (;;) { - uint64_t pobj; - char component[MAXNAMELEN + 2]; - size_t complen; - int is_xattrdir; - - if (prevdb) { - ASSERT3P(prevhdl, !=, NULL); - zfs_release_sa_handle(prevhdl, prevdb, FTAG); - } - - if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, - &is_xattrdir)) != 0) - break; - - if (pobj == obj) { - if (path[0] != '/') - *--path = '/'; - break; - } - - component[0] = '/'; - if (is_xattrdir) { - (void) sprintf(component + 1, ""); - } else { - error = zap_value_search(osp, pobj, obj, - ZFS_DIRENT_OBJ(-1ULL), component + 1); - if (error != 0) - break; - } - - complen = strlen(component); - path -= complen; - ASSERT3P(path, >=, buf); - memcpy(path, component, complen); - obj = pobj; - - if (sa_hdl != hdl) { - prevhdl = sa_hdl; - prevdb = sa_db; - } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); - if (error != 0) { - sa_hdl = prevhdl; - sa_db = prevdb; - break; - } - } - - if (sa_hdl != NULL && sa_hdl != hdl) { - ASSERT3P(sa_db, !=, NULL); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - } - - if (error == 0) - (void) memmove(buf, path, buf + len - path); - - return (error); -} - -int -zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) -{ - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -int -zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, - char *buf, int len) -{ - char *path = buf + len - 1; - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - *path = '\0'; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_stats_impl(hdl, sa_table, sb); - if (error != 0) { - zfs_release_sa_handle(hdl, db, FTAG); - return (error); - } - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -/* - * Read a property stored within the master node. - */ -int -zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) -{ - uint64_t *cached_copy = NULL; - - /* - * Figure out where in the objset_t the cached copy would live, if it - * is available for the requested property. - */ - if (os != NULL) { - switch (prop) { - case ZFS_PROP_VERSION: - cached_copy = &os->os_version; - break; - case ZFS_PROP_NORMALIZE: - cached_copy = &os->os_normalization; - break; - case ZFS_PROP_UTF8ONLY: - cached_copy = &os->os_utf8only; - break; - case ZFS_PROP_CASE: - cached_copy = &os->os_casesensitivity; - break; - default: - break; - } - } - if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { - *value = *cached_copy; - return (0); - } - - /* - * If the property wasn't cached, look up the file system's value for - * the property. For the version property, we look up a slightly - * different string. - */ - const char *pname; - int error = ENOENT; - if (prop == ZFS_PROP_VERSION) { - pname = ZPL_VERSION_STR; - } else { - pname = zfs_prop_to_name(prop); - } - - if (os != NULL) { - ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); - error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); - } - - if (error == ENOENT) { - /* No value set, use the default value */ - switch (prop) { - case ZFS_PROP_VERSION: - *value = ZPL_VERSION; - break; - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - *value = 0; - break; - case ZFS_PROP_CASE: - *value = ZFS_CASE_SENSITIVE; - break; - case ZFS_PROP_ACLTYPE: - *value = ZFS_ACLTYPE_NFSV4; - break; - default: - return (error); - } - error = 0; - } - - /* - * If one of the methods for getting the property value above worked, - * copy it into the objset_t's cache. - */ - if (error == 0 && cached_copy != NULL) { - *cached_copy = *value; - } - - return (error); -} - - - -void -zfs_znode_update_vfs(znode_t *zp) -{ - vm_object_t object; - - if ((object = ZTOV(zp)->v_object) == NULL || - zp->z_size == object->un_pager.vnp.vnp_size) - return; - - vnode_pager_setsize(ZTOV(zp), zp->z_size); -} - - -#ifdef _KERNEL int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) { @@ -2186,9 +1808,7 @@ zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) err = zfs_zget(zfsvfs, parent, dzpp); return (err); } -#endif /* _KERNEL */ -#ifdef _KERNEL int zfs_rlimit_fsize(off_t fsize) { @@ -2211,4 +1831,3 @@ zfs_rlimit_fsize(off_t fsize) return (EFBIG); } -#endif /* _KERNEL */ diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode_os.c similarity index 86% rename from module/os/linux/zfs/zfs_znode.c rename to module/os/linux/zfs/zfs_znode_os.c index 265153e011..f72e205cd8 100644 --- a/module/os/linux/zfs/zfs_znode.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -25,7 +25,6 @@ /* Portions Copyright 2007 Jeremy Teo */ -#ifdef _KERNEL #include #include #include @@ -49,8 +48,6 @@ #include #include #include -#endif /* _KERNEL */ - #include #include #include @@ -65,13 +62,6 @@ #include "zfs_prop.h" #include "zfs_comutil.h" -/* - * Functions needed for userland (ie: libzpool) are not put under - * #ifdef_KERNEL; the rest of the functions have dependencies - * (such as VFS logic) that will not compile easily in userland. - */ -#ifdef _KERNEL - static kmem_cache_t *znode_cache = NULL; static kmem_cache_t *znode_hold_cache = NULL; unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; @@ -2006,360 +1996,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) kmem_free(sb, sizeof (struct super_block)); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } -#endif /* _KERNEL */ -static int -zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) -{ - uint64_t sa_obj = 0; - int error; - - error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); - if (error != 0 && error != ENOENT) - return (error); - - error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); - return (error); -} - -static int -zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, - dmu_buf_t **db, const void *tag) -{ - dmu_object_info_t doi; - int error; - - if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) - return (error); - - dmu_object_info_from_db(*db, &doi); - if ((doi.doi_bonus_type != DMU_OT_SA && - doi.doi_bonus_type != DMU_OT_ZNODE) || - (doi.doi_bonus_type == DMU_OT_ZNODE && - doi.doi_bonus_size < sizeof (znode_phys_t))) { - sa_buf_rele(*db, tag); - return (SET_ERROR(ENOTSUP)); - } - - error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); - if (error != 0) { - sa_buf_rele(*db, tag); - return (error); - } - - return (0); -} - -static void -zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) -{ - sa_handle_destroy(hdl); - sa_buf_rele(db, tag); -} - -/* - * Given an object number, return its parent object number and whether - * or not the object is an extended attribute directory. - */ -static int -zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, - uint64_t *pobjp, int *is_xattrdir) -{ - uint64_t parent; - uint64_t pflags; - uint64_t mode; - uint64_t parent_mode; - sa_bulk_attr_t bulk[3]; - sa_handle_t *sa_hdl; - dmu_buf_t *sa_db; - int count = 0; - int error; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, - &parent, sizeof (parent)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, - &pflags, sizeof (pflags)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &mode, sizeof (mode)); - - if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) - return (error); - - /* - * When a link is removed its parent pointer is not changed and will - * be invalid. There are two cases where a link is removed but the - * file stays around, when it goes to the delete queue and when there - * are additional links. - */ - error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); - if (error != 0) - return (error); - - error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - if (error != 0) - return (error); - - *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); - - /* - * Extended attributes can be applied to files, directories, etc. - * Otherwise the parent must be a directory. - */ - if (!*is_xattrdir && !S_ISDIR(parent_mode)) - return (SET_ERROR(EINVAL)); - - *pobjp = parent; - - return (0); -} - -/* - * Given an object number, return some zpl level statistics - */ -static int -zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, - zfs_stat_t *sb) -{ - sa_bulk_attr_t bulk[4]; - int count = 0; - - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, - &sb->zs_mode, sizeof (sb->zs_mode)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, - &sb->zs_gen, sizeof (sb->zs_gen)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, - &sb->zs_links, sizeof (sb->zs_links)); - SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, - &sb->zs_ctime, sizeof (sb->zs_ctime)); - - return (sa_bulk_lookup(hdl, bulk, count)); -} - -static int -zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, - sa_attr_type_t *sa_table, char *buf, int len) -{ - sa_handle_t *sa_hdl; - sa_handle_t *prevhdl = NULL; - dmu_buf_t *prevdb = NULL; - dmu_buf_t *sa_db = NULL; - char *path = buf + len - 1; - int error; - - *path = '\0'; - sa_hdl = hdl; - - uint64_t deleteq_obj; - VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, - ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); - error = zap_lookup_int(osp, deleteq_obj, obj); - if (error == 0) { - return (ESTALE); - } else if (error != ENOENT) { - return (error); - } - - for (;;) { - uint64_t pobj = 0; - char component[MAXNAMELEN + 2]; - size_t complen; - int is_xattrdir = 0; - - if (prevdb) { - ASSERT(prevhdl != NULL); - zfs_release_sa_handle(prevhdl, prevdb, FTAG); - } - - if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, - &is_xattrdir)) != 0) - break; - - if (pobj == obj) { - if (path[0] != '/') - *--path = '/'; - break; - } - - component[0] = '/'; - if (is_xattrdir) { - strcpy(component + 1, ""); - } else { - error = zap_value_search(osp, pobj, obj, - ZFS_DIRENT_OBJ(-1ULL), component + 1); - if (error != 0) - break; - } - - complen = strlen(component); - path -= complen; - ASSERT(path >= buf); - memcpy(path, component, complen); - obj = pobj; - - if (sa_hdl != hdl) { - prevhdl = sa_hdl; - prevdb = sa_db; - } - error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); - if (error != 0) { - sa_hdl = prevhdl; - sa_db = prevdb; - break; - } - } - - if (sa_hdl != NULL && sa_hdl != hdl) { - ASSERT(sa_db != NULL); - zfs_release_sa_handle(sa_hdl, sa_db, FTAG); - } - - if (error == 0) - (void) memmove(buf, path, buf + len - path); - - return (error); -} - -int -zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) -{ - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -int -zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, - char *buf, int len) -{ - char *path = buf + len - 1; - sa_attr_type_t *sa_table; - sa_handle_t *hdl; - dmu_buf_t *db; - int error; - - *path = '\0'; - - error = zfs_sa_setup(osp, &sa_table); - if (error != 0) - return (error); - - error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); - if (error != 0) - return (error); - - error = zfs_obj_to_stats_impl(hdl, sa_table, sb); - if (error != 0) { - zfs_release_sa_handle(hdl, db, FTAG); - return (error); - } - - error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); - - zfs_release_sa_handle(hdl, db, FTAG); - return (error); -} - -/* - * Read a property stored within the master node. - */ -int -zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) -{ - uint64_t *cached_copy = NULL; - - /* - * Figure out where in the objset_t the cached copy would live, if it - * is available for the requested property. - */ - if (os != NULL) { - switch (prop) { - case ZFS_PROP_VERSION: - cached_copy = &os->os_version; - break; - case ZFS_PROP_NORMALIZE: - cached_copy = &os->os_normalization; - break; - case ZFS_PROP_UTF8ONLY: - cached_copy = &os->os_utf8only; - break; - case ZFS_PROP_CASE: - cached_copy = &os->os_casesensitivity; - break; - default: - break; - } - } - if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { - *value = *cached_copy; - return (0); - } - - /* - * If the property wasn't cached, look up the file system's value for - * the property. For the version property, we look up a slightly - * different string. - */ - const char *pname; - int error = ENOENT; - if (prop == ZFS_PROP_VERSION) - pname = ZPL_VERSION_STR; - else - pname = zfs_prop_to_name(prop); - - if (os != NULL) { - ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); - error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); - } - - if (error == ENOENT) { - /* No value set, use the default value */ - switch (prop) { - case ZFS_PROP_VERSION: - *value = ZPL_VERSION; - break; - case ZFS_PROP_NORMALIZE: - case ZFS_PROP_UTF8ONLY: - *value = 0; - break; - case ZFS_PROP_CASE: - *value = ZFS_CASE_SENSITIVE; - break; - case ZFS_PROP_ACLTYPE: - *value = ZFS_ACLTYPE_OFF; - break; - default: - return (error); - } - error = 0; - } - - /* - * If one of the methods for getting the property value above worked, - * copy it into the objset_t's cache. - */ - if (error == 0 && cached_copy != NULL) { - *cached_copy = *value; - } - - return (error); -} - -#if defined(_KERNEL) EXPORT_SYMBOL(zfs_create_fs); EXPORT_SYMBOL(zfs_obj_to_path); @@ -2369,4 +2006,3 @@ MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); module_param(zfs_unlink_suspend_progress, int, 0644); MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks " "(debug - leaks space into the unlinked set)"); -#endif diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c new file mode 100644 index 0000000000..2e28587c8b --- /dev/null +++ b/module/zfs/zfs_znode.c @@ -0,0 +1,397 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2018 by Delphix. All rights reserved. + * Copyright (c) 2014 Integros [integros.com] + */ + +/* Portions Copyright 2007 Jeremy Teo */ +/* Portions Copyright 2011 Martin Matuska */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zfs_prop.h" +#include "zfs_comutil.h" + +static int +zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) +{ + uint64_t sa_obj = 0; + int error; + + error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); + if (error != 0 && error != ENOENT) + return (error); + + error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); + return (error); +} + +static int +zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, + dmu_buf_t **db, const void *tag) +{ + dmu_object_info_t doi; + int error; + + if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) + return (error); + + dmu_object_info_from_db(*db, &doi); + if ((doi.doi_bonus_type != DMU_OT_SA && + doi.doi_bonus_type != DMU_OT_ZNODE) || + (doi.doi_bonus_type == DMU_OT_ZNODE && + doi.doi_bonus_size < sizeof (znode_phys_t))) { + sa_buf_rele(*db, tag); + return (SET_ERROR(ENOTSUP)); + } + + error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); + if (error != 0) { + sa_buf_rele(*db, tag); + return (error); + } + + return (0); +} + +static void +zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, const void *tag) +{ + sa_handle_destroy(hdl); + sa_buf_rele(db, tag); +} + +/* + * Given an object number, return its parent object number and whether + * or not the object is an extended attribute directory. + */ +int +zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, + uint64_t *pobjp, int *is_xattrdir) +{ + uint64_t parent; + uint64_t pflags; + uint64_t mode; + uint64_t parent_mode; + sa_bulk_attr_t bulk[3]; + sa_handle_t *sa_hdl; + dmu_buf_t *sa_db; + int count = 0; + int error; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, + &parent, sizeof (parent)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, + &pflags, sizeof (pflags)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &mode, sizeof (mode)); + + if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) + return (error); + + /* + * When a link is removed its parent pointer is not changed and will + * be invalid. There are two cases where a link is removed but the + * file stays around, when it goes to the delete queue and when there + * are additional links. + */ + error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); + if (error != 0) + return (error); + + error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); + if (error != 0) + return (error); + + *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); + + /* + * Extended attributes can be applied to files, directories, etc. + * Otherwise the parent must be a directory. + */ + if (!*is_xattrdir && !S_ISDIR(parent_mode)) + return (SET_ERROR(EINVAL)); + + *pobjp = parent; + + return (0); +} + +/* + * Given an object number, return some zpl level statistics + */ +static int +zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, + zfs_stat_t *sb) +{ + sa_bulk_attr_t bulk[4]; + int count = 0; + + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, + &sb->zs_mode, sizeof (sb->zs_mode)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, + &sb->zs_gen, sizeof (sb->zs_gen)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, + &sb->zs_links, sizeof (sb->zs_links)); + SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, + &sb->zs_ctime, sizeof (sb->zs_ctime)); + + return (sa_bulk_lookup(hdl, bulk, count)); +} + +static int +zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, + sa_attr_type_t *sa_table, char *buf, int len) +{ + sa_handle_t *sa_hdl; + sa_handle_t *prevhdl = NULL; + dmu_buf_t *prevdb = NULL; + dmu_buf_t *sa_db = NULL; + char *path = buf + len - 1; + int error; + + *path = '\0'; + sa_hdl = hdl; + + uint64_t deleteq_obj; + VERIFY0(zap_lookup(osp, MASTER_NODE_OBJ, + ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj)); + error = zap_lookup_int(osp, deleteq_obj, obj); + if (error == 0) { + return (ESTALE); + } else if (error != ENOENT) { + return (error); + } + + for (;;) { + uint64_t pobj = 0; + char component[MAXNAMELEN + 2]; + size_t complen; + int is_xattrdir = 0; + + if (prevdb) { + ASSERT3P(prevhdl, !=, NULL); + zfs_release_sa_handle(prevhdl, prevdb, FTAG); + } + + if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, + &is_xattrdir)) != 0) + break; + + if (pobj == obj) { + if (path[0] != '/') + *--path = '/'; + break; + } + + component[0] = '/'; + if (is_xattrdir) { + strcpy(component + 1, ""); + } else { + error = zap_value_search(osp, pobj, obj, + ZFS_DIRENT_OBJ(-1ULL), component + 1); + if (error != 0) + break; + } + + complen = strlen(component); + path -= complen; + ASSERT3P(path, >=, buf); + memcpy(path, component, complen); + obj = pobj; + + if (sa_hdl != hdl) { + prevhdl = sa_hdl; + prevdb = sa_db; + } + error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); + if (error != 0) { + sa_hdl = prevhdl; + sa_db = prevdb; + break; + } + } + + if (sa_hdl != NULL && sa_hdl != hdl) { + ASSERT3P(sa_db, !=, NULL); + zfs_release_sa_handle(sa_hdl, sa_db, FTAG); + } + + if (error == 0) + (void) memmove(buf, path, buf + len - path); + + return (error); +} + +int +zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) +{ + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} + +int +zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, + char *buf, int len) +{ + char *path = buf + len - 1; + sa_attr_type_t *sa_table; + sa_handle_t *hdl; + dmu_buf_t *db; + int error; + + *path = '\0'; + + error = zfs_sa_setup(osp, &sa_table); + if (error != 0) + return (error); + + error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); + if (error != 0) + return (error); + + error = zfs_obj_to_stats_impl(hdl, sa_table, sb); + if (error != 0) { + zfs_release_sa_handle(hdl, db, FTAG); + return (error); + } + + error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); + + zfs_release_sa_handle(hdl, db, FTAG); + return (error); +} + +/* + * Read a property stored within the master node. + */ +int +zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) +{ + uint64_t *cached_copy = NULL; + + /* + * Figure out where in the objset_t the cached copy would live, if it + * is available for the requested property. + */ + if (os != NULL) { + switch (prop) { + case ZFS_PROP_VERSION: + cached_copy = &os->os_version; + break; + case ZFS_PROP_NORMALIZE: + cached_copy = &os->os_normalization; + break; + case ZFS_PROP_UTF8ONLY: + cached_copy = &os->os_utf8only; + break; + case ZFS_PROP_CASE: + cached_copy = &os->os_casesensitivity; + break; + default: + break; + } + } + if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { + *value = *cached_copy; + return (0); + } + + /* + * If the property wasn't cached, look up the file system's value for + * the property. For the version property, we look up a slightly + * different string. + */ + const char *pname; + int error = ENOENT; + if (prop == ZFS_PROP_VERSION) + pname = ZPL_VERSION_STR; + else + pname = zfs_prop_to_name(prop); + + if (os != NULL) { + ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); + error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); + } + + if (error == ENOENT) { + /* No value set, use the default value */ + switch (prop) { + case ZFS_PROP_VERSION: + *value = ZPL_VERSION; + break; + case ZFS_PROP_NORMALIZE: + case ZFS_PROP_UTF8ONLY: + *value = 0; + break; + case ZFS_PROP_CASE: + *value = ZFS_CASE_SENSITIVE; + break; + case ZFS_PROP_ACLTYPE: +#ifdef __FreeBSD__ + *value = ZFS_ACLTYPE_NFSV4; +#else + *value = ZFS_ACLTYPE_OFF; +#endif + break; + default: + return (error); + } + error = 0; + } + + /* + * If one of the methods for getting the property value above worked, + * copy it into the objset_t's cache. + */ + if (error == 0 && cached_copy != NULL) { + *cached_copy = *value; + } + + return (error); +} From 4e2f8ba14acc6e8d799d68d928be2c45106de9b5 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 28 Aug 2024 22:28:58 +1000 Subject: [PATCH 6/6] zfs_debug: specific variant for userspace Just nice and simple, with room to grow. Signed-off-by: Rob Norris Sponsored-by: https://despairlabs.com/sponsor/ --- lib/libzpool/Makefile.am | 4 +- lib/libzpool/vdev_label_os.c | 12 +++- lib/libzpool/zfs_debug.c | 106 ++++++++++++++++++++++++++++++ module/os/freebsd/zfs/zfs_debug.c | 38 +---------- module/os/linux/zfs/zfs_debug.c | 40 ----------- 5 files changed, 118 insertions(+), 82 deletions(-) create mode 100644 lib/libzpool/zfs_debug.c diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 27b8b9070b..fd5eb6d957 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -17,7 +17,8 @@ dist_libzpool_la_SOURCES = \ %D%/taskq.c \ %D%/util.c \ %D%/vdev_label_os.c \ - %D%/zfs_racct.c + %D%/zfs_racct.c \ + %D%/zfs_debug.c nodist_libzpool_la_SOURCES = \ module/lua/lapi.c \ @@ -46,7 +47,6 @@ nodist_libzpool_la_SOURCES = \ module/lua/lzio.c \ \ module/os/linux/zfs/vdev_file.c \ - module/os/linux/zfs/zfs_debug.c \ module/os/linux/zfs/zio_crypt.c \ \ module/zcommon/cityhash.c \ diff --git a/lib/libzpool/vdev_label_os.c b/lib/libzpool/vdev_label_os.c index 3d965b89a9..c303cba6dc 100644 --- a/lib/libzpool/vdev_label_os.c +++ b/lib/libzpool/vdev_label_os.c @@ -30,10 +30,16 @@ #include /* - * Check if the reserved boot area is in-use. + * Check if the reserved boot area is in-use. This is called from + * spa_vdev_attach() when adding a device to a raidz vdev, to ensure that the + * reserved area is available as scratch space for raidz expansion. * - * This function always returns 0, as there are no known external uses - * of the reserved area on Linux. + * This function currently always returns 0. On Linux, there are no known + * external uses of the reserved area. On FreeBSD, the reserved boot area is + * used when booting to a ZFS root from an MBR partition. + * + * Currently nothing using libzpool can add a disk to a pool, so this does + * nothing. */ int vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd) diff --git a/lib/libzpool/zfs_debug.c b/lib/libzpool/zfs_debug.c new file mode 100644 index 0000000000..df49a9a33f --- /dev/null +++ b/lib/libzpool/zfs_debug.c @@ -0,0 +1,106 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2024, Rob Norris + */ + +#include + +typedef struct zfs_dbgmsg { + list_node_t zdm_node; + uint64_t zdm_timestamp; + uint_t zdm_size; + char zdm_msg[]; /* variable length allocation */ +} zfs_dbgmsg_t; + +static list_t zfs_dbgmsgs; +static kmutex_t zfs_dbgmsgs_lock; + +int zfs_dbgmsg_enable = B_TRUE; + +void +zfs_dbgmsg_init(void) +{ + list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t), + offsetof(zfs_dbgmsg_t, zdm_node)); + mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL); +} + +void +zfs_dbgmsg_fini(void) +{ + zfs_dbgmsg_t *zdm; + while ((zdm = list_remove_head(&zfs_dbgmsgs))) + umem_free(zdm, zdm->zdm_size); + mutex_destroy(&zfs_dbgmsgs_lock); +} + +void +__set_error(const char *file, const char *func, int line, int err) +{ + if (zfs_flags & ZFS_DEBUG_SET_ERROR) + __dprintf(B_FALSE, file, func, line, "error %lu", + (ulong_t)err); +} + +void +__zfs_dbgmsg(char *buf) +{ + uint_t size = sizeof (zfs_dbgmsg_t) + strlen(buf) + 1; + zfs_dbgmsg_t *zdm = umem_zalloc(size, KM_SLEEP); + zdm->zdm_size = size; + zdm->zdm_timestamp = gethrestime_sec(); + strcpy(zdm->zdm_msg, buf); + + mutex_enter(&zfs_dbgmsgs_lock); + list_insert_tail(&zfs_dbgmsgs, zdm); + mutex_exit(&zfs_dbgmsgs_lock); +} + +void +zfs_dbgmsg_print(int fd, const char *tag) +{ + ssize_t ret __attribute__((unused)); + + mutex_enter(&zfs_dbgmsgs_lock); + + /* + * We use write() in this function instead of printf() + * so it is safe to call from a signal handler. + */ + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") START:\n", 9); + + for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs); zdm != NULL; + zdm = list_next(&zfs_dbgmsgs, zdm)) { + ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); + ret = write(fd, "\n", 1); + } + + ret = write(fd, "ZFS_DBGMSG(", 11); + ret = write(fd, tag, strlen(tag)); + ret = write(fd, ") END\n", 6); + + mutex_exit(&zfs_dbgmsgs_lock); +} diff --git a/module/os/freebsd/zfs/zfs_debug.c b/module/os/freebsd/zfs/zfs_debug.c index c4cebe1020..a7ddd3c2f5 100644 --- a/module/os/freebsd/zfs/zfs_debug.c +++ b/module/os/freebsd/zfs/zfs_debug.c @@ -140,15 +140,11 @@ zfs_dbgmsg_fini(void) { if (zfs_dbgmsg_kstat) kstat_delete(zfs_dbgmsg_kstat); - /* - * TODO - decide how to make this permanent - */ -#ifdef _KERNEL + mutex_enter(&zfs_dbgmsgs_lock); zfs_dbgmsg_purge(0); mutex_exit(&zfs_dbgmsgs_lock); mutex_destroy(&zfs_dbgmsgs_lock); -#endif } void @@ -184,7 +180,6 @@ __set_error(const char *file, const char *func, int line, int err) __dprintf(B_FALSE, file, func, line, "error %lu", (ulong_t)err); } -#ifdef _KERNEL void __dprintf(boolean_t dprint, const char *file, const char *func, int line, const char *fmt, ...) @@ -229,37 +224,6 @@ __dprintf(boolean_t dprint, const char *file, const char *func, kmem_free(buf, size); } -#else - -void -zfs_dbgmsg_print(int fd, const char *tag) -{ - ssize_t ret __attribute__((unused)); - - /* - * We use write() in this function instead of printf() - * so it is safe to call from a signal handler. - */ - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") START:\n", 9); - - mutex_enter(&zfs_dbgmsgs_lock); - - for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs); zdm != NULL; - zdm = list_next(&zfs_dbgmsgs, zdm)) - ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); - ret = write(fd, "\n", 1); - } - - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") END\n", 6); - - mutex_exit(&zfs_dbgmsgs_lock); -} -#endif /* _KERNEL */ - ZFS_MODULE_PARAM(zfs, zfs_, dbgmsg_enable, INT, ZMOD_RW, "Enable ZFS debug message log"); diff --git a/module/os/linux/zfs/zfs_debug.c b/module/os/linux/zfs/zfs_debug.c index 9ee40771fc..a017900d55 100644 --- a/module/os/linux/zfs/zfs_debug.c +++ b/module/os/linux/zfs/zfs_debug.c @@ -111,12 +111,7 @@ zfs_dbgmsg_fini(void) procfs_list_uninstall(&zfs_dbgmsgs); zfs_dbgmsg_purge(0); - /* - * TODO - decide how to make this permanent - */ -#ifdef _KERNEL procfs_list_destroy(&zfs_dbgmsgs); -#endif } void @@ -148,8 +143,6 @@ __zfs_dbgmsg(char *buf) mutex_exit(&zfs_dbgmsgs.pl_lock); } -#ifdef _KERNEL - void __dprintf(boolean_t dprint, const char *file, const char *func, int line, const char *fmt, ...) @@ -218,38 +211,6 @@ __dprintf(boolean_t dprint, const char *file, const char *func, kmem_free(buf, size); } -#else - -void -zfs_dbgmsg_print(int fd, const char *tag) -{ - ssize_t ret __attribute__((unused)); - - mutex_enter(&zfs_dbgmsgs.pl_lock); - - /* - * We use write() in this function instead of printf() - * so it is safe to call from a signal handler. - */ - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") START:\n", 9); - - for (zfs_dbgmsg_t *zdm = list_head(&zfs_dbgmsgs.pl_list); zdm != NULL; - zdm = list_next(&zfs_dbgmsgs.pl_list, zdm)) { - ret = write(fd, zdm->zdm_msg, strlen(zdm->zdm_msg)); - ret = write(fd, "\n", 1); - } - - ret = write(fd, "ZFS_DBGMSG(", 11); - ret = write(fd, tag, strlen(tag)); - ret = write(fd, ") END\n", 6); - - mutex_exit(&zfs_dbgmsgs.pl_lock); -} -#endif /* _KERNEL */ - -#ifdef _KERNEL module_param(zfs_dbgmsg_enable, int, 0644); MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log"); @@ -257,4 +218,3 @@ MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log"); module_param(zfs_dbgmsg_maxsize, uint, 0644); /* END CSTYLED */ MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size"); -#endif