From 987014903f9d36783547188b6ad00f01d9a076bd Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Wed, 12 Oct 2016 17:30:46 -0700 Subject: [PATCH] Fix unlinked file cannot do xattr operations Currently, doing things like fsetxattr(2) on an unlinked file will result in ENODATA. There's two places that cause this: zfs_dirent_lock and zfs_zget. The fix in zfs_dirent_lock is pretty straightforward. In zfs_zget though, we need it to not return error when the zp is unlinked. This is a pretty big change in behavior, but skimming through all the callers, I don't think this change would cause any problem. Also there's nothing preventing z_unlinked from being set after the z_lock mutex is dropped before but before zfs_zget returns anyway. The rest of the stuff is to make sure we don't log xattr stuff when owner is unlinked. Signed-off-by: Chunwei Chen --- include/sys/zfs_znode.h | 1 + module/zfs/zfs_acl.c | 9 +------ module/zfs/zfs_dir.c | 7 +++--- module/zfs/zfs_log.c | 38 +++++++++++++++++++++++++--- module/zfs/zfs_znode.c | 55 +++++++++++++++++++++-------------------- 5 files changed, 68 insertions(+), 42 deletions(-) diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index a12675d6f5..8be7283a7c 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -194,6 +194,7 @@ typedef struct znode { zfs_acl_t *z_acl_cached; /* cached acl */ krwlock_t z_xattr_lock; /* xattr data lock */ nvlist_t *z_xattr_cached; /* cached xattrs */ + uint64_t z_xattr_parent; /* parent obj for this xattr */ list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ boolean_t z_is_sa; /* are we native sa? */ diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index 7198c7ebff..defb8f4483 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -2492,15 +2492,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) * If attribute then validate against base file */ if (is_attr) { - uint64_t parent; - - if ((error = sa_lookup(zp->z_sa_hdl, - SA_ZPL_PARENT(ZTOZSB(zp)), &parent, - sizeof (parent))) != 0) - return (error); - if ((error = zfs_zget(ZTOZSB(zp), - parent, &xzp)) != 0) { + zp->z_xattr_parent, &xzp)) != 0) { return (error); } diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 8eee626d98..c28b85c98f 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -240,7 +240,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, mutex_enter(&dzp->z_lock); for (;;) { - if (dzp->z_unlinked) { + if (dzp->z_unlinked && !(flag & ZXATTR)) { mutex_exit(&dzp->z_lock); if (!(flag & ZHAVELOCK)) rw_exit(&dzp->z_name_lock); @@ -998,8 +998,9 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr) VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id, sizeof (xzp->z_id), tx)); - (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp, - xzp, "", NULL, acl_ids.z_fuidp, vap); + if (!zp->z_unlinked) + (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp, + xzp, "", NULL, acl_ids.z_fuidp, vap); zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); diff --git a/module/zfs/zfs_log.c b/module/zfs/zfs_log.c index 69efb3c161..8bb8b0a977 100644 --- a/module/zfs/zfs_log.c +++ b/module/zfs/zfs_log.c @@ -211,6 +211,34 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) return (start); } +/* + * If zp is an xattr node, check whether the xattr owner is unlinked. + * We don't want to log anything if the owner is unlinked. + */ +static int +zfs_xattr_owner_unlinked(znode_t *zp) +{ + int unlinked = 0; + znode_t *dzp; + igrab(ZTOI(zp)); + /* + * if zp is XATTR node, keep walking up via z_xattr_parent until we + * get the owner + */ + while (zp->z_pflags & ZFS_XATTR) { + ASSERT3U(zp->z_xattr_parent, !=, 0); + if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) { + unlinked = 1; + break; + } + iput(ZTOI(zp)); + zp = dzp; + unlinked = zp->z_unlinked; + } + iput(ZTOI(zp)); + return (unlinked); +} + /* * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and * TK_MKXATTR transactions. @@ -247,7 +275,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, size_t namesize = strlen(name) + 1; size_t fuidsz = 0; - if (zil_replaying(zilog, tx)) + if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp)) return; /* @@ -352,7 +380,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, lr_remove_t *lr; size_t namesize = strlen(name) + 1; - if (zil_replaying(zilog, tx)) + if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp)) return; itx = zil_itx_create(txtype, sizeof (*lr) + namesize); @@ -463,7 +491,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, uintptr_t fsync_cnt; ssize_t immediate_write_sz; - if (zil_replaying(zilog, tx) || zp->z_unlinked) { + if (zil_replaying(zilog, tx) || zp->z_unlinked || + zfs_xattr_owner_unlinked(zp)) { if (callback != NULL) callback(callback_data); return; @@ -543,7 +572,8 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, itx_t *itx; lr_truncate_t *lr; - if (zil_replaying(zilog, tx) || zp->z_unlinked) + if (zil_replaying(zilog, tx) || zp->z_unlinked || + zfs_xattr_owner_unlinked(zp)) return; itx = zil_itx_create(txtype, sizeof (*lr)); diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 11bb8675be..ebf512a84f 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -119,6 +119,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) zp->z_dirlocks = NULL; zp->z_acl_cached = NULL; zp->z_xattr_cached = NULL; + zp->z_xattr_parent = 0; zp->z_moved = 0; return (0); } @@ -590,6 +591,10 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zfs_uid_write(ip, z_uid); zfs_gid_write(ip, z_gid); + /* Cache the xattr parent id */ + if (zp->z_pflags & ZFS_XATTR) + zp->z_xattr_parent = parent; + ZFS_TIME_DECODE(&ip->i_atime, atime); ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_ctime, ctime); @@ -1060,34 +1065,30 @@ again: mutex_enter(&zp->z_lock); ASSERT3U(zp->z_id, ==, obj_num); - if (zp->z_unlinked) { - err = SET_ERROR(ENOENT); - } else { - /* - * If igrab() returns NULL the VFS has independently - * determined the inode should be evicted and has - * called iput_final() to start the eviction process. - * The SA handle is still valid but because the VFS - * requires that the eviction succeed we must drop - * our locks and references to allow the eviction to - * complete. The zfs_zget() may then be retried. - * - * This unlikely case could be optimized by registering - * a sops->drop_inode() callback. The callback would - * need to detect the active SA hold thereby informing - * the VFS that this inode should not be evicted. - */ - if (igrab(ZTOI(zp)) == NULL) { - mutex_exit(&zp->z_lock); - sa_buf_rele(db, NULL); - zfs_znode_hold_exit(zsb, zh); - /* inode might need this to finish evict */ - cond_resched(); - goto again; - } - *zpp = zp; - err = 0; + /* + * If igrab() returns NULL the VFS has independently + * determined the inode should be evicted and has + * called iput_final() to start the eviction process. + * The SA handle is still valid but because the VFS + * requires that the eviction succeed we must drop + * our locks and references to allow the eviction to + * complete. The zfs_zget() may then be retried. + * + * This unlikely case could be optimized by registering + * a sops->drop_inode() callback. The callback would + * need to detect the active SA hold thereby informing + * the VFS that this inode should not be evicted. + */ + if (igrab(ZTOI(zp)) == NULL) { + mutex_exit(&zp->z_lock); + sa_buf_rele(db, NULL); + zfs_znode_hold_exit(zsb, zh); + /* inode might need this to finish evict */ + cond_resched(); + goto again; } + *zpp = zp; + err = 0; mutex_exit(&zp->z_lock); sa_buf_rele(db, NULL); zfs_znode_hold_exit(zsb, zh);