From 8a4f60965141e7021cc81221bb4329511ad97ca7 Mon Sep 17 00:00:00 2001 From: Andrew Walker Date: Thu, 4 May 2023 12:58:47 -0400 Subject: [PATCH] Improve zpl_permission performance This function can be frequently called with MAY_EXEC|MAY_NOT_BLOCK during RCU path walk. Where possible we should try not to break out of it. In this case we check whether flag ZFS_NO_EXECS_DENIED is set and check mode (similar to fastexecute check in zfs_acl.c). Signed-off-by: Andrew Walker --- module/os/linux/zfs/zpl_xattr.c | 77 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c index 67f93ea8e8..138c1cb467 100644 --- a/module/os/linux/zfs/zpl_xattr.c +++ b/module/os/linux/zfs/zpl_xattr.c @@ -1488,6 +1488,14 @@ static xattr_handler_t zpl_xattr_acl_default_handler = { #endif /* CONFIG_FS_POSIX_ACL */ +/* + * zpl_permission() gets called by linux kernel whenever it checks + * inode_permission via inode->i_op->permission. The general preference + * is to defer to the standard in-kernel permission check (generic_permission) + * wherever possible. + * + * https://www.kernel.org/doc/Documentation/filesystems/vfs.txt + */ int #if defined(HAVE_IOPS_PERMISSION_USERNS) zpl_permission(struct user_namespace *userns, struct inode *ip, int mask) @@ -1532,23 +1540,40 @@ zpl_permission(struct inode *ip, int mask) } /* - * Avoid potentially blocking in RCU walk. + * Fast path for execute checks. Do not use zfs_fastaccesschk_execute + * since it may end up granting execute access in presence of explicit + * deny entry for user / group, and it also read the ZFS ACL + * (non-cached) which we wish to avoid in RCU. */ - if (mask & MAY_NOT_BLOCK) { - return (-ECHILD); - } - - cr = CRED(); - crhold(cr); - ret = -zfs_access(ITOZ(ip), to_check, V_ACE_MASK, cr); - if (ret != -EPERM && ret != -EACCES) { - crfree(cr); - return (ret); - } + if ((to_check == ACE_EXECUTE) && + (ITOZ(ip)->z_pflags & ZFS_NO_EXECS_DENIED)) + return (0); /* - * There are some situations in which capabilities - * may allow overriding the DACL. + * inode permission operation may be called in rcu-walk mode + * (mask & MAY_NOT_BLOCK). If in rcu-walk mode, the filesystem must + * check the permission without blocking or storing to the inode. + * + * If a situation is encountered that rcu-walk cannot handle, + * return -ECHILD and it will be called again in ref-walk mode. + */ + cr = CRED(); + crhold(cr); + + /* + * There are some situations in which capabilities may allow overriding + * the DACL. Skip reading ACL if requested permissions are fully + * satisfied by capabilities. + */ + + /* + * CAP_DAC_OVERRIDE may override RWX on directories, and RW on other + * files. Execute may also be overriden if at least one execute bit is + * set. This behavior is not formally documented, but is covered in + * commit messages and code comments in namei.c. + * + * CAP_DAC_READ_SEARCH may bypass file read permission checks and + * directory read and execute permission checks. */ if (S_ISDIR(ip->i_mode)) { #ifdef SB_NFSV4ACL @@ -1565,25 +1590,27 @@ zpl_permission(struct inode *ip, int mask) crfree(cr); return (0); } - crfree(cr); - return (ret); } - if (to_check == ACE_READ_DATA) { - if (capable(CAP_DAC_READ_SEARCH)) { - crfree(cr); - return (0); - } - } - - if (!(mask & MAY_EXEC) || - (zfs_fastaccesschk_execute(ITOZ(ip), cr) == 0)) { + if (!(mask & MAY_EXEC) || (ip->i_mode & S_IXUGO)) { if (capable(CAP_DAC_OVERRIDE)) { crfree(cr); return (0); } } + if ((to_check == ACE_READ_DATA) && + capable(CAP_DAC_READ_SEARCH)) { + crfree(cr); + return (0); + } + + if (mask & MAY_NOT_BLOCK) { + crfree(cr); + return (-ECHILD); + } + + ret = -zfs_access(ITOZ(ip), to_check, V_ACE_MASK, cr); crfree(cr); return (ret); }