From eebfd28e9d712bcfb5a6d20d59be208645474066 Mon Sep 17 00:00:00 2001 From: Andrew Date: Fri, 1 Apr 2022 11:53:54 -0500 Subject: [PATCH] Linux optimize access checks when ACL is trivial Bypass check of ZFS aces if the ACL is trivial. When an ACL is trivial its permissions are represented by the mode without any loss of information. In this case, it is safe to convert the access request into equivalent mode and then pass desired mask and inode to generic_permission(). This has the added benefit of also checking whether entries in a POSIX ACL on the file grant the desired access. This commit also skips the ACL check on looking up the xattr dir since such restrictions don't exist in Linux kernel and it makes xattr lookup behavior inconsistent between SA and file-based xattrs. We also don't want to perform a POSIX ACL check while looking up the POSIX ACL if for some reason it is located in the xattr dir rather than an SA. Reviewed-by: Brian Behlendorf Co-authored-by: Ryan Moeller Signed-off-by: Andrew Walker Closes #13237 --- config/kernel-inode-permission.m4 | 29 +++++++++++++ config/kernel.m4 | 2 + module/os/linux/zfs/zfs_acl.c | 70 ++++++++++++++++++++++++++++++ module/os/linux/zfs/zfs_vnops_os.c | 2 +- 4 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 config/kernel-inode-permission.m4 diff --git a/config/kernel-inode-permission.m4 b/config/kernel-inode-permission.m4 new file mode 100644 index 0000000000..ba9ff5d43d --- /dev/null +++ b/config/kernel-inode-permission.m4 @@ -0,0 +1,29 @@ +AC_DEFUN([ZFS_AC_KERNEL_SRC_PERMISSION], [ + dnl # + dnl # 5.12 API change that added the struct user_namespace* arg + dnl # to the front of this function type's arg list. + dnl # + ZFS_LINUX_TEST_SRC([permission_userns], [ + #include + #include + + int inode_permission(struct user_namespace *userns, + struct inode *inode, int mask) { return 0; } + + static const struct inode_operations + iops __attribute__ ((unused)) = { + .permission = inode_permission, + }; + ],[]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_PERMISSION], [ + AC_MSG_CHECKING([whether iops->permission() takes struct user_namespace*]) + ZFS_LINUX_TEST_RESULT([permission_userns], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_IOPS_PERMISSION_USERNS, 1, + [iops->permission() takes struct user_namespace*]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index d1d3dede17..639d183771 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -82,6 +82,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_MKDIR ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS ZFS_AC_KERNEL_SRC_CREATE + ZFS_AC_KERNEL_SRC_PERMISSION ZFS_AC_KERNEL_SRC_GET_LINK ZFS_AC_KERNEL_SRC_PUT_LINK ZFS_AC_KERNEL_SRC_TMPFILE @@ -193,6 +194,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_MKDIR ZFS_AC_KERNEL_LOOKUP_FLAGS ZFS_AC_KERNEL_CREATE + ZFS_AC_KERNEL_PERMISSION ZFS_AC_KERNEL_GET_LINK ZFS_AC_KERNEL_PUT_LINK ZFS_AC_KERNEL_TMPFILE diff --git a/module/os/linux/zfs/zfs_acl.c b/module/os/linux/zfs/zfs_acl.c index 351e4dad79..b70691ab31 100644 --- a/module/os/linux/zfs/zfs_acl.c +++ b/module/os/linux/zfs/zfs_acl.c @@ -863,6 +863,26 @@ zfs_unix_to_v4(uint32_t access_mask) return (new_mask); } + +static int +zfs_v4_to_unix(uint32_t access_mask, int *unmapped) +{ + int new_mask = 0; + + *unmapped = access_mask & + (ACE_WRITE_OWNER | ACE_WRITE_ACL | ACE_DELETE); + + if (access_mask & WRITE_MASK) + new_mask |= S_IWOTH; + if (access_mask & ACE_READ_DATA) + new_mask |= S_IROTH; + if (access_mask & ACE_EXECUTE) + new_mask |= S_IXOTH; + + return (new_mask); +} + + static void zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask, uint16_t access_type, uint64_t fuid, uint16_t entry_type) @@ -2399,6 +2419,53 @@ zfs_has_access(znode_t *zp, cred_t *cr) return (B_TRUE); } +/* + * Simplified access check for case where ACL is known to not contain + * information beyond what is defined in the mode. In this case, we + * can pass along to the kernel / vfs generic_permission() check, which + * evaluates the mode and POSIX ACL. + * + * NFSv4 ACLs allow granting permissions that are usually relegated only + * to the file owner or superuser. Examples are ACE_WRITE_OWNER (chown), + * ACE_WRITE_ACL(chmod), and ACE_DELETE. ACE_DELETE requests must fail + * because with conventional posix permissions, right to delete file + * is determined by write bit on the parent dir. + * + * If unmappable perms are requested, then we must return EPERM + * and include those bits in the working_mode so that the caller of + * zfs_zaccess_common() can decide whether to perform additional + * policy / capability checks. EACCES is used in zfs_zaccess_aces_check() + * to indicate access check failed due to explicit DENY entry, and so + * we want to avoid that here. + */ +static int +zfs_zaccess_trivial(znode_t *zp, uint32_t *working_mode, cred_t *cr) +{ + int err, mask; + int unmapped = 0; + + ASSERT(zp->z_pflags & ZFS_ACL_TRIVIAL); + + mask = zfs_v4_to_unix(*working_mode, &unmapped); + if (mask == 0 || unmapped) { + *working_mode = unmapped; + return (unmapped ? SET_ERROR(EPERM) : 0); + } + +#if defined(HAVE_IOPS_PERMISSION_USERNS) + err = generic_permission(cr->user_ns, ZTOI(zp), mask); +#else + err = generic_permission(ZTOI(zp), mask); +#endif + if (err != 0) { + return (SET_ERROR(EPERM)); + } + + *working_mode = unmapped; + + return (0); +} + static int zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr) @@ -2450,6 +2517,9 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, return (SET_ERROR(EPERM)); } + if (zp->z_pflags & ZFS_ACL_TRIVIAL) + return (zfs_zaccess_trivial(zp, working_mode, cr)); + return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr)); } diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index ece7c373e8..b65728f0d4 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -474,7 +474,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr, */ if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0, - B_FALSE, cr))) { + B_TRUE, cr))) { zrele(*zpp); *zpp = NULL; }