Fix unlinked file cannot do xattr operations
Currently, doing things like fsetxattr(2) on an unlinked file will result in ENODATA. There's two places that cause this: zfs_dirent_lock and zfs_zget. The fix in zfs_dirent_lock is pretty straightforward. In zfs_zget though, we need it to not return error when the zp is unlinked. This is a pretty big change in behavior, but skimming through all the callers, I don't think this change would cause any problem. Also there's nothing preventing z_unlinked from being set after the z_lock mutex is dropped before but before zfs_zget returns anyway. The rest of the stuff is to make sure we don't log xattr stuff when owner is unlinked. Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
This commit is contained in:
parent
7f547f85fe
commit
987014903f
|
@ -194,6 +194,7 @@ typedef struct znode {
|
||||||
zfs_acl_t *z_acl_cached; /* cached acl */
|
zfs_acl_t *z_acl_cached; /* cached acl */
|
||||||
krwlock_t z_xattr_lock; /* xattr data lock */
|
krwlock_t z_xattr_lock; /* xattr data lock */
|
||||||
nvlist_t *z_xattr_cached; /* cached xattrs */
|
nvlist_t *z_xattr_cached; /* cached xattrs */
|
||||||
|
uint64_t z_xattr_parent; /* parent obj for this xattr */
|
||||||
list_node_t z_link_node; /* all znodes in fs link */
|
list_node_t z_link_node; /* all znodes in fs link */
|
||||||
sa_handle_t *z_sa_hdl; /* handle to sa data */
|
sa_handle_t *z_sa_hdl; /* handle to sa data */
|
||||||
boolean_t z_is_sa; /* are we native sa? */
|
boolean_t z_is_sa; /* are we native sa? */
|
||||||
|
|
|
@ -2492,15 +2492,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
|
||||||
* If attribute then validate against base file
|
* If attribute then validate against base file
|
||||||
*/
|
*/
|
||||||
if (is_attr) {
|
if (is_attr) {
|
||||||
uint64_t parent;
|
|
||||||
|
|
||||||
if ((error = sa_lookup(zp->z_sa_hdl,
|
|
||||||
SA_ZPL_PARENT(ZTOZSB(zp)), &parent,
|
|
||||||
sizeof (parent))) != 0)
|
|
||||||
return (error);
|
|
||||||
|
|
||||||
if ((error = zfs_zget(ZTOZSB(zp),
|
if ((error = zfs_zget(ZTOZSB(zp),
|
||||||
parent, &xzp)) != 0) {
|
zp->z_xattr_parent, &xzp)) != 0) {
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -240,7 +240,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
|
||||||
|
|
||||||
mutex_enter(&dzp->z_lock);
|
mutex_enter(&dzp->z_lock);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (dzp->z_unlinked) {
|
if (dzp->z_unlinked && !(flag & ZXATTR)) {
|
||||||
mutex_exit(&dzp->z_lock);
|
mutex_exit(&dzp->z_lock);
|
||||||
if (!(flag & ZHAVELOCK))
|
if (!(flag & ZHAVELOCK))
|
||||||
rw_exit(&dzp->z_name_lock);
|
rw_exit(&dzp->z_name_lock);
|
||||||
|
@ -998,8 +998,9 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
|
||||||
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
|
VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
|
||||||
sizeof (xzp->z_id), tx));
|
sizeof (xzp->z_id), tx));
|
||||||
|
|
||||||
(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
|
if (!zp->z_unlinked)
|
||||||
xzp, "", NULL, acl_ids.z_fuidp, vap);
|
(void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
|
||||||
|
xzp, "", NULL, acl_ids.z_fuidp, vap);
|
||||||
|
|
||||||
zfs_acl_ids_free(&acl_ids);
|
zfs_acl_ids_free(&acl_ids);
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
|
@ -211,6 +211,34 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
|
||||||
return (start);
|
return (start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If zp is an xattr node, check whether the xattr owner is unlinked.
|
||||||
|
* We don't want to log anything if the owner is unlinked.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
zfs_xattr_owner_unlinked(znode_t *zp)
|
||||||
|
{
|
||||||
|
int unlinked = 0;
|
||||||
|
znode_t *dzp;
|
||||||
|
igrab(ZTOI(zp));
|
||||||
|
/*
|
||||||
|
* if zp is XATTR node, keep walking up via z_xattr_parent until we
|
||||||
|
* get the owner
|
||||||
|
*/
|
||||||
|
while (zp->z_pflags & ZFS_XATTR) {
|
||||||
|
ASSERT3U(zp->z_xattr_parent, !=, 0);
|
||||||
|
if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
|
||||||
|
unlinked = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
iput(ZTOI(zp));
|
||||||
|
zp = dzp;
|
||||||
|
unlinked = zp->z_unlinked;
|
||||||
|
}
|
||||||
|
iput(ZTOI(zp));
|
||||||
|
return (unlinked);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
|
* Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
|
||||||
* TK_MKXATTR transactions.
|
* TK_MKXATTR transactions.
|
||||||
|
@ -247,7 +275,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||||
size_t namesize = strlen(name) + 1;
|
size_t namesize = strlen(name) + 1;
|
||||||
size_t fuidsz = 0;
|
size_t fuidsz = 0;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx))
|
if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -352,7 +380,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
|
||||||
lr_remove_t *lr;
|
lr_remove_t *lr;
|
||||||
size_t namesize = strlen(name) + 1;
|
size_t namesize = strlen(name) + 1;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx))
|
if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
|
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
|
||||||
|
@ -463,7 +491,8 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
uintptr_t fsync_cnt;
|
uintptr_t fsync_cnt;
|
||||||
ssize_t immediate_write_sz;
|
ssize_t immediate_write_sz;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx) || zp->z_unlinked) {
|
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
|
||||||
|
zfs_xattr_owner_unlinked(zp)) {
|
||||||
if (callback != NULL)
|
if (callback != NULL)
|
||||||
callback(callback_data);
|
callback(callback_data);
|
||||||
return;
|
return;
|
||||||
|
@ -543,7 +572,8 @@ zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||||
itx_t *itx;
|
itx_t *itx;
|
||||||
lr_truncate_t *lr;
|
lr_truncate_t *lr;
|
||||||
|
|
||||||
if (zil_replaying(zilog, tx) || zp->z_unlinked)
|
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
|
||||||
|
zfs_xattr_owner_unlinked(zp))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
itx = zil_itx_create(txtype, sizeof (*lr));
|
itx = zil_itx_create(txtype, sizeof (*lr));
|
||||||
|
|
|
@ -119,6 +119,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
|
||||||
zp->z_dirlocks = NULL;
|
zp->z_dirlocks = NULL;
|
||||||
zp->z_acl_cached = NULL;
|
zp->z_acl_cached = NULL;
|
||||||
zp->z_xattr_cached = NULL;
|
zp->z_xattr_cached = NULL;
|
||||||
|
zp->z_xattr_parent = 0;
|
||||||
zp->z_moved = 0;
|
zp->z_moved = 0;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
@ -590,6 +591,10 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
|
||||||
zfs_uid_write(ip, z_uid);
|
zfs_uid_write(ip, z_uid);
|
||||||
zfs_gid_write(ip, z_gid);
|
zfs_gid_write(ip, z_gid);
|
||||||
|
|
||||||
|
/* Cache the xattr parent id */
|
||||||
|
if (zp->z_pflags & ZFS_XATTR)
|
||||||
|
zp->z_xattr_parent = parent;
|
||||||
|
|
||||||
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
||||||
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
||||||
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
|
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
|
||||||
|
@ -1060,34 +1065,30 @@ again:
|
||||||
|
|
||||||
mutex_enter(&zp->z_lock);
|
mutex_enter(&zp->z_lock);
|
||||||
ASSERT3U(zp->z_id, ==, obj_num);
|
ASSERT3U(zp->z_id, ==, obj_num);
|
||||||
if (zp->z_unlinked) {
|
/*
|
||||||
err = SET_ERROR(ENOENT);
|
* If igrab() returns NULL the VFS has independently
|
||||||
} else {
|
* determined the inode should be evicted and has
|
||||||
/*
|
* called iput_final() to start the eviction process.
|
||||||
* If igrab() returns NULL the VFS has independently
|
* The SA handle is still valid but because the VFS
|
||||||
* determined the inode should be evicted and has
|
* requires that the eviction succeed we must drop
|
||||||
* called iput_final() to start the eviction process.
|
* our locks and references to allow the eviction to
|
||||||
* The SA handle is still valid but because the VFS
|
* complete. The zfs_zget() may then be retried.
|
||||||
* requires that the eviction succeed we must drop
|
*
|
||||||
* our locks and references to allow the eviction to
|
* This unlikely case could be optimized by registering
|
||||||
* complete. The zfs_zget() may then be retried.
|
* a sops->drop_inode() callback. The callback would
|
||||||
*
|
* need to detect the active SA hold thereby informing
|
||||||
* This unlikely case could be optimized by registering
|
* the VFS that this inode should not be evicted.
|
||||||
* a sops->drop_inode() callback. The callback would
|
*/
|
||||||
* need to detect the active SA hold thereby informing
|
if (igrab(ZTOI(zp)) == NULL) {
|
||||||
* the VFS that this inode should not be evicted.
|
mutex_exit(&zp->z_lock);
|
||||||
*/
|
sa_buf_rele(db, NULL);
|
||||||
if (igrab(ZTOI(zp)) == NULL) {
|
zfs_znode_hold_exit(zsb, zh);
|
||||||
mutex_exit(&zp->z_lock);
|
/* inode might need this to finish evict */
|
||||||
sa_buf_rele(db, NULL);
|
cond_resched();
|
||||||
zfs_znode_hold_exit(zsb, zh);
|
goto again;
|
||||||
/* inode might need this to finish evict */
|
|
||||||
cond_resched();
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
*zpp = zp;
|
|
||||||
err = 0;
|
|
||||||
}
|
}
|
||||||
|
*zpp = zp;
|
||||||
|
err = 0;
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
sa_buf_rele(db, NULL);
|
sa_buf_rele(db, NULL);
|
||||||
zfs_znode_hold_exit(zsb, zh);
|
zfs_znode_hold_exit(zsb, zh);
|
||||||
|
|
Loading…
Reference in New Issue