Fix unlinked file cannot do xattr operations

Currently, doing things like fsetxattr(2) on an unlinked file will result in
ENODATA. There's two places that cause this: zfs_dirent_lock and zfs_zget.

The fix in zfs_dirent_lock is pretty straightforward. In zfs_zget though, we
need it to not return error when the zp is unlinked. This is a pretty big
change in behavior, but skimming through all the callers, I don't think this
change would cause any problem. Also there's nothing preventing z_unlinked
from being set after the z_lock mutex is dropped before but before zfs_zget
returns anyway.

The rest of the stuff is to make sure we don't log xattr stuff when owner is
unlinked.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
This commit is contained in:
Chunwei Chen 2016-10-12 17:30:46 -07:00 committed by Brian Behlendorf
parent 7f547f85fe
commit 987014903f
5 changed files with 68 additions and 42 deletions

View File

@ -194,6 +194,7 @@ typedef struct znode {
zfs_acl_t *z_acl_cached; /* cached acl */ zfs_acl_t *z_acl_cached; /* cached acl */
krwlock_t z_xattr_lock; /* xattr data lock */ krwlock_t z_xattr_lock; /* xattr data lock */
nvlist_t *z_xattr_cached; /* cached xattrs */ nvlist_t *z_xattr_cached; /* cached xattrs */
uint64_t z_xattr_parent; /* parent obj for this xattr */
list_node_t z_link_node; /* all znodes in fs link */ list_node_t z_link_node; /* all znodes in fs link */
sa_handle_t *z_sa_hdl; /* handle to sa data */ sa_handle_t *z_sa_hdl; /* handle to sa data */
boolean_t z_is_sa; /* are we native sa? */ boolean_t z_is_sa; /* are we native sa? */

View File

@ -2492,15 +2492,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
* If attribute then validate against base file * If attribute then validate against base file
*/ */
if (is_attr) { if (is_attr) {
uint64_t parent;
if ((error = sa_lookup(zp->z_sa_hdl,
SA_ZPL_PARENT(ZTOZSB(zp)), &parent,
sizeof (parent))) != 0)
return (error);
if ((error = zfs_zget(ZTOZSB(zp), if ((error = zfs_zget(ZTOZSB(zp),
parent, &xzp)) != 0) { zp->z_xattr_parent, &xzp)) != 0) {
return (error); return (error);
} }

View File

@ -240,7 +240,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
mutex_enter(&dzp->z_lock); mutex_enter(&dzp->z_lock);
for (;;) { for (;;) {
if (dzp->z_unlinked) { if (dzp->z_unlinked && !(flag & ZXATTR)) {
mutex_exit(&dzp->z_lock); mutex_exit(&dzp->z_lock);
if (!(flag & ZHAVELOCK)) if (!(flag & ZHAVELOCK))
rw_exit(&dzp->z_name_lock); rw_exit(&dzp->z_name_lock);
@ -998,8 +998,9 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id, VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
sizeof (xzp->z_id), tx)); sizeof (xzp->z_id), tx));
(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp, if (!zp->z_unlinked)
xzp, "", NULL, acl_ids.z_fuidp, vap); (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
xzp, "", NULL, acl_ids.z_fuidp, vap);
zfs_acl_ids_free(&acl_ids); zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx); dmu_tx_commit(tx);

View File

@ -211,6 +211,34 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
return (start); return (start);
} }
/*
* If zp is an xattr node, check whether the xattr owner is unlinked.
* We don't want to log anything if the owner is unlinked.
*/
static int
zfs_xattr_owner_unlinked(znode_t *zp)
{
int unlinked = 0;
znode_t *dzp;
igrab(ZTOI(zp));
/*
* if zp is XATTR node, keep walking up via z_xattr_parent until we
* get the owner
*/
while (zp->z_pflags & ZFS_XATTR) {
ASSERT3U(zp->z_xattr_parent, !=, 0);
if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
unlinked = 1;
break;
}
iput(ZTOI(zp));
zp = dzp;
unlinked = zp->z_unlinked;
}
iput(ZTOI(zp));
return (unlinked);
}
/* /*
* Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and * Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
* TK_MKXATTR transactions. * TK_MKXATTR transactions.
@ -247,7 +275,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
size_t namesize = strlen(name) + 1; size_t namesize = strlen(name) + 1;
size_t fuidsz = 0; size_t fuidsz = 0;
if (zil_replaying(zilog, tx)) if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return; return;
/* /*
@ -352,7 +380,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr_remove_t *lr; lr_remove_t *lr;
size_t namesize = strlen(name) + 1; size_t namesize = strlen(name) + 1;
if (zil_replaying(zilog, tx)) if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return; return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize); itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
@ -463,7 +491,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uintptr_t fsync_cnt; uintptr_t fsync_cnt;
ssize_t immediate_write_sz; ssize_t immediate_write_sz;
if (zil_replaying(zilog, tx) || zp->z_unlinked) { if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
if (callback != NULL) if (callback != NULL)
callback(callback_data); callback(callback_data);
return; return;
@ -543,7 +572,8 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_t *itx; itx_t *itx;
lr_truncate_t *lr; lr_truncate_t *lr;
if (zil_replaying(zilog, tx) || zp->z_unlinked) if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp))
return; return;
itx = zil_itx_create(txtype, sizeof (*lr)); itx = zil_itx_create(txtype, sizeof (*lr));

View File

@ -119,6 +119,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_dirlocks = NULL; zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL; zp->z_acl_cached = NULL;
zp->z_xattr_cached = NULL; zp->z_xattr_cached = NULL;
zp->z_xattr_parent = 0;
zp->z_moved = 0; zp->z_moved = 0;
return (0); return (0);
} }
@ -590,6 +591,10 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zfs_uid_write(ip, z_uid); zfs_uid_write(ip, z_uid);
zfs_gid_write(ip, z_gid); zfs_gid_write(ip, z_gid);
/* Cache the xattr parent id */
if (zp->z_pflags & ZFS_XATTR)
zp->z_xattr_parent = parent;
ZFS_TIME_DECODE(&ip->i_atime, atime); ZFS_TIME_DECODE(&ip->i_atime, atime);
ZFS_TIME_DECODE(&ip->i_mtime, mtime); ZFS_TIME_DECODE(&ip->i_mtime, mtime);
ZFS_TIME_DECODE(&ip->i_ctime, ctime); ZFS_TIME_DECODE(&ip->i_ctime, ctime);
@ -1060,34 +1065,30 @@ again:
mutex_enter(&zp->z_lock); mutex_enter(&zp->z_lock);
ASSERT3U(zp->z_id, ==, obj_num); ASSERT3U(zp->z_id, ==, obj_num);
if (zp->z_unlinked) { /*
err = SET_ERROR(ENOENT); * If igrab() returns NULL the VFS has independently
} else { * determined the inode should be evicted and has
/* * called iput_final() to start the eviction process.
* If igrab() returns NULL the VFS has independently * The SA handle is still valid but because the VFS
* determined the inode should be evicted and has * requires that the eviction succeed we must drop
* called iput_final() to start the eviction process. * our locks and references to allow the eviction to
* The SA handle is still valid but because the VFS * complete. The zfs_zget() may then be retried.
* requires that the eviction succeed we must drop *
* our locks and references to allow the eviction to * This unlikely case could be optimized by registering
* complete. The zfs_zget() may then be retried. * a sops->drop_inode() callback. The callback would
* * need to detect the active SA hold thereby informing
* This unlikely case could be optimized by registering * the VFS that this inode should not be evicted.
* a sops->drop_inode() callback. The callback would */
* need to detect the active SA hold thereby informing if (igrab(ZTOI(zp)) == NULL) {
* the VFS that this inode should not be evicted. mutex_exit(&zp->z_lock);
*/ sa_buf_rele(db, NULL);
if (igrab(ZTOI(zp)) == NULL) { zfs_znode_hold_exit(zsb, zh);
mutex_exit(&zp->z_lock); /* inode might need this to finish evict */
sa_buf_rele(db, NULL); cond_resched();
zfs_znode_hold_exit(zsb, zh); goto again;
/* inode might need this to finish evict */
cond_resched();
goto again;
}
*zpp = zp;
err = 0;
} }
*zpp = zp;
err = 0;
mutex_exit(&zp->z_lock); mutex_exit(&zp->z_lock);
sa_buf_rele(db, NULL); sa_buf_rele(db, NULL);
zfs_znode_hold_exit(zsb, zh); zfs_znode_hold_exit(zsb, zh);