Add support for O_TMPFILE

Linux 3.11 add O_TMPFILE to open(2), which allow creating an unlinked file on
supported filesystem. It's basically doing open(2) and unlink(2) atomically.

The filesystem support is added through i_op->tmpfile. We basically copy the
create operation except we get rid of the link and name related stuff and add
the new node to unlinked set.

We also add support for linkat(2) to link tmpfile. However, since all previous
file operation will skip ZIL, we force a txg_wait_synced to make sure we are
sync safe.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
This commit is contained in:
Chunwei Chen 2016-01-26 12:29:46 -08:00 committed by Brian Behlendorf
parent 987014903f
commit ace1eae84c
18 changed files with 644 additions and 8 deletions

23
config/kernel-tmpfile.m4 Normal file
View File

@ -0,0 +1,23 @@
dnl #
dnl # 3.11 API change
dnl # Add support for i_op->tmpfile
dnl #
AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
AC_MSG_CHECKING([whether i_op->tmpfile() exists])
ZFS_LINUX_TRY_COMPILE([
#include <linux/fs.h>
int tmpfile(struct inode *inode, struct dentry *dentry,
umode_t mode) { return 0; }
static struct inode_operations
iops __attribute__ ((unused)) = {
.tmpfile = tmpfile,
};
],[
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_TMPFILE, 1,
[i_op->tmpfile() exists])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -69,6 +69,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_CREATE_NAMEIDATA
ZFS_AC_KERNEL_GET_LINK
ZFS_AC_KERNEL_PUT_LINK
ZFS_AC_KERNEL_TMPFILE
ZFS_AC_KERNEL_TRUNCATE_RANGE
ZFS_AC_KERNEL_AUTOMOUNT
ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE

View File

@ -272,6 +272,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/snapused/Makefile
tests/zfs-tests/tests/functional/sparse/Makefile
tests/zfs-tests/tests/functional/threadsappend/Makefile
tests/zfs-tests/tests/functional/tmpfile/Makefile
tests/zfs-tests/tests/functional/truncate/Makefile
tests/zfs-tests/tests/functional/userquota/Makefile
tests/zfs-tests/tests/functional/upgrade/Makefile

View File

@ -47,6 +47,7 @@ extern "C" {
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
#define IS_TMPFILE 0x04 /* create a tmpfile */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);

View File

@ -47,6 +47,8 @@ extern int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp,
int flags, cred_t *cr, int *direntflags, pathname_t *realpnp);
extern int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
extern int zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
extern int zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags);
extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp);

View File

@ -1509,6 +1509,123 @@ out:
}
EXPORT_SYMBOL(zfs_create);
/* ARGSUSED */
int
zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
{
znode_t *zp = NULL, *dzp = ITOZ(dip);
zfs_sb_t *zsb = ITOZSB(dip);
objset_t *os;
dmu_tx_t *tx;
int error;
uid_t uid;
gid_t gid;
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
boolean_t have_acl = B_FALSE;
boolean_t waited = B_FALSE;
/*
* If we have an ephemeral id, ACL, or XVATTR then
* make sure file system is at proper version
*/
gid = crgetgid(cr);
uid = crgetuid(cr);
if (zsb->z_use_fuids == B_FALSE &&
(vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
return (SET_ERROR(EINVAL));
ZFS_ENTER(zsb);
ZFS_VERIFY_ZP(dzp);
os = zsb->z_os;
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
ZFS_EXIT(zsb);
return (error);
}
}
top:
*ipp = NULL;
/*
* Create a new file object and update the directory
* to reference it.
*/
if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
if (have_acl)
zfs_acl_ids_free(&acl_ids);
goto out;
}
if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
cr, vsecp, &acl_ids)) != 0)
goto out;
have_acl = B_TRUE;
if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
error = SET_ERROR(EDQUOT);
goto out;
}
tx = dmu_tx_create(os);
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
fuid_dirtied = zsb->z_fuid_dirty;
if (fuid_dirtied)
zfs_fuid_txhold(zsb, tx);
if (!zsb->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
if (error == ERESTART) {
waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
ZFS_EXIT(zsb);
return (error);
}
zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
if (fuid_dirtied)
zfs_fuid_sync(zsb, tx);
/* Add to unlinked set */
zp->z_unlinked = 1;
zfs_unlinked_add(zp, tx);
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
out:
if (error) {
if (zp)
iput(ZTOI(zp));
} else {
zfs_inode_update(dzp);
zfs_inode_update(zp);
*ipp = ZTOI(zp);
}
ZFS_EXIT(zsb);
return (error);
}
/*
* Remove an entry from a directory.
*
@ -3802,7 +3919,11 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr,
uint64_t parent;
uid_t owner;
boolean_t waited = B_FALSE;
boolean_t is_tmpfile = 0;
uint64_t txg;
#ifdef HAVE_TMPFILE
is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
#endif
ASSERT(S_ISDIR(tdip->i_mode));
ZFS_ENTER(zsb);
@ -3885,6 +4006,9 @@ top:
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
if (is_tmpfile)
dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
@ -3900,23 +4024,43 @@ top:
ZFS_EXIT(zsb);
return (error);
}
/* unmark z_unlinked so zfs_link_create will not reject */
if (is_tmpfile)
szp->z_unlinked = 0;
error = zfs_link_create(dl, szp, tx, 0);
if (error == 0) {
uint64_t txtype = TX_LINK;
/*
* tmpfile is created to be in z_unlinkedobj, so remove it.
* Also, we don't log in ZIL, be cause all previous file
* operation on the tmpfile are ignored by ZIL. Instead we
* always wait for txg to sync to make sure all previous
* operation are sync safe.
*/
if (is_tmpfile) {
VERIFY(zap_remove_int(zsb->z_os, zsb->z_unlinkedobj,
szp->z_id, tx) == 0);
} else {
if (flags & FIGNORECASE)
txtype |= TX_CI;
zfs_log_link(zilog, tx, txtype, dzp, szp, name);
}
} else if (is_tmpfile) {
/* restore z_unlinked since when linking failed */
szp->z_unlinked = 1;
}
txg = dmu_tx_get_txg(tx);
dmu_tx_commit(tx);
zfs_dirent_unlock(dl);
if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
if (!is_tmpfile && zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
if (is_tmpfile)
txg_wait_synced(dmu_objset_pool(zsb->z_os), txg);
zfs_inode_update(dzp);
zfs_inode_update(szp);
ZFS_EXIT(zsb);

View File

@ -764,7 +764,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
links = 2;
} else {
size = 0;
links = 1;
links = (flag & IS_TMPFILE) ? 0 : 1;
}
if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))

View File

@ -214,6 +214,45 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
return (error);
}
#ifdef HAVE_TMPFILE
static int
zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
{
cred_t *cr = CRED();
struct inode *ip;
vattr_t *vap;
int error;
fstrans_cookie_t cookie;
crhold(cr);
vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(vap, dir, mode, cr);
cookie = spl_fstrans_mark();
error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
if (error == 0) {
/* d_tmpfile will do drop_nlink, so we should set it first */
set_nlink(ip, 1);
d_tmpfile(dentry, ip);
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
if (error == 0)
error = zpl_init_acl(ip, dir);
/*
* don't need to handle error here, file is already in
* unlinked set.
*/
}
spl_fstrans_unmark(cookie);
kmem_free(vap, sizeof (vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);
return (error);
}
#endif
static int
zpl_unlink(struct inode *dir, struct dentry *dentry)
{
@ -700,6 +739,9 @@ const struct inode_operations zpl_dir_inode_operations = {
.rename = zpl_rename2,
#else
.rename = zpl_rename,
#endif
#ifdef HAVE_TMPFILE
.tmpfile = zpl_tmpfile,
#endif
.setattr = zpl_setattr,
.getattr = zpl_getattr,

View File

@ -586,6 +586,9 @@ tests = ['sparse_001_pos']
#[tests/functional/threadsappend]
#tests = ['threadsappend_001_pos']
[tests/functional/tmpfile]
tests = ['tmpfile_001_pos', 'tmpfile_002_pos', 'tmpfile_003_pos']
[tests/functional/truncate]
tests = ['truncate_001_pos', 'truncate_002_pos']

View File

@ -50,6 +50,7 @@ SUBDIRS = \
snapused \
sparse \
threadsappend \
tmpfile \
truncate \
upgrade \
userquota \

View File

@ -0,0 +1,3 @@
/tmpfile_test
/tmpfile_001_pos
/tmpfile_002_pos

View File

@ -0,0 +1,15 @@
include $(top_srcdir)/config/Rules.am
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile
dist_pkgdata_SCRIPTS = \
cleanup.ksh \
setup.ksh
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/tmpfile
pkgexec_PROGRAMS = tmpfile_test tmpfile_001_pos tmpfile_002_pos tmpfile_003_pos
tmpfile_test_SOURCES= tmpfile_test.c
tmpfile_001_pos_SOURCES = tmpfile_001_pos.c
tmpfile_002_pos_SOURCES = tmpfile_002_pos.c
tmpfile_003_pos_SOURCES = tmpfile_003_pos.c

View File

@ -0,0 +1,34 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2013 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
default_cleanup

View File

@ -0,0 +1,39 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
#
# Copyright (c) 2013 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
if ! $STF_SUITE/tests/functional/tmpfile/tmpfile_test /tmp; then
log_unsupported "The kernel doesn't support O_TMPFILE."
fi
DISK=${DISKS%% *}
default_setup $DISK

View File

@ -0,0 +1,109 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/xattr.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <time.h>
/* backward compat in case it's not defined */
#ifndef O_TMPFILE
#define O_TMPFILE (020000000|O_DIRECTORY)
#endif
/*
* DESCRIPTION:
* Verify we can create tmpfile.
*
* STRATEGY:
* 1. open(2) with O_TMPFILE.
* 2. write(2) random data to it, then read(2) and compare.
* 3. fsetxattr(2) random data, then fgetxattr(2) and compare.
* 4. Verify the above operations run successfully.
*
*/
#define BSZ 64
void
fill_random(char *buf, int len)
{
int i;
srand(time(NULL));
for (i = 0; i < len; i++) {
buf[i] = (char)rand();
}
}
int
main(int argc, char *argv[])
{
int i, fd;
char buf1[BSZ], buf2[BSZ] = {};
char *penv[] = {"TESTDIR"};
(void) fprintf(stdout, "Verify O_TMPFILE is working properly.\n");
/*
* Get the environment variable values.
*/
for (i = 0; i < sizeof (penv) / sizeof (char *); i++) {
if ((penv[i] = getenv(penv[i])) == NULL) {
(void) fprintf(stderr, "getenv(penv[%d])\n", i);
exit(1);
}
}
fill_random(buf1, BSZ);
fd = open(penv[0], O_RDWR|O_TMPFILE, 0666);
if (fd < 0) {
perror("open");
exit(2);
}
if (write(fd, buf1, BSZ) < 0) {
perror("write");
close(fd);
exit(3);
}
if (pread(fd, buf2, BSZ, 0) < 0) {
perror("pread");
close(fd);
exit(4);
}
if (memcmp(buf1, buf2, BSZ) != 0) {
fprintf(stderr, "data corrupted\n");
close(fd);
exit(5);
}
memset(buf2, 0, BSZ);
if (fsetxattr(fd, "user.test", buf1, BSZ, 0) < 0) {
perror("fsetxattr");
close(fd);
exit(6);
}
if (fgetxattr(fd, "user.test", buf2, BSZ) < 0) {
perror("fgetxattr");
close(fd);
exit(7);
}
if (memcmp(buf1, buf2, BSZ) != 0) {
fprintf(stderr, "xattr corrupted\n");
close(fd);
exit(8);
}
close(fd);
return (0);
}

View File

@ -0,0 +1,98 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
/* backward compat in case it's not defined */
#ifndef O_TMPFILE
#define O_TMPFILE (020000000|O_DIRECTORY)
#endif
/*
* DESCRIPTION:
* Verify we can link tmpfile.
*
* STRATEGY:
* 1. open(2) with O_TMPFILE.
* 2. linkat(2).
* 3. freeze the pool, export and re-import the pool.
* 3. stat(2) the path to verify it has been created.
*
*/
int
main(int argc, char *argv[])
{
int i, fd, ret;
char spath[1024], dpath[1024];
char *penv[] = {"TESTDIR", "TESTFILE0"};
struct stat sbuf;
(void) fprintf(stdout, "Verify O_TMPFILE file can be linked.\n");
/*
* Get the environment variable values.
*/
for (i = 0; i < sizeof (penv) / sizeof (char *); i++) {
if ((penv[i] = getenv(penv[i])) == NULL) {
(void) fprintf(stderr, "getenv(penv[%d])\n", i);
exit(1);
}
}
fd = open(penv[0], O_RDWR|O_TMPFILE, 0666);
if (fd < 0) {
perror("open");
exit(2);
}
snprintf(spath, 1024, "/proc/self/fd/%d", fd);
snprintf(dpath, 1024, "%s/%s", penv[0], penv[1]);
if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) < 0) {
perror("linkat");
close(fd);
exit(3);
}
if ((ret = system("sudo zpool freeze $TESTPOOL"))) {
if (ret == -1)
perror("system \"zpool freeze\"");
else
fprintf(stderr, "zpool freeze exits with %d\n",
WEXITSTATUS(ret));
exit(4);
}
close(fd);
if ((ret = system("sudo zpool export $TESTPOOL"))) {
if (ret == -1)
perror("system \"zpool export\"");
else
fprintf(stderr, "zpool export exits with %d\n",
WEXITSTATUS(ret));
exit(4);
}
if ((ret = system("sudo zpool import $TESTPOOL"))) {
if (ret == -1)
perror("system \"zpool import\"");
else
fprintf(stderr, "zpool import exits with %d\n",
WEXITSTATUS(ret));
exit(4);
}
if (stat(dpath, &sbuf) < 0) {
perror("stat");
unlink(dpath);
exit(5);
}
unlink(dpath);
return (0);
}

View File

@ -0,0 +1,68 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
/* backward compat in case it's not defined */
#ifndef O_TMPFILE
#define O_TMPFILE (020000000|O_DIRECTORY)
#endif
/*
* DESCRIPTION:
* Verify O_EXCL tmpfile cannot be linked.
*
* STRATEGY:
* 1. open(2) with O_TMPFILE|O_EXCL.
* 2. linkat(2).
* 3. stat(2) the path to verify it wasn't created.
*
*/
int
main(int argc, char *argv[])
{
int i, fd;
char spath[1024], dpath[1024];
char *penv[] = {"TESTDIR", "TESTFILE0"};
struct stat sbuf;
(void) fprintf(stdout, "Verify O_EXCL tmpfile cannot be linked.\n");
/*
* Get the environment variable values.
*/
for (i = 0; i < sizeof (penv) / sizeof (char *); i++) {
if ((penv[i] = getenv(penv[i])) == NULL) {
(void) fprintf(stderr, "getenv(penv[%d])\n", i);
exit(1);
}
}
fd = open(penv[0], O_RDWR|O_TMPFILE|O_EXCL, 0666);
if (fd < 0) {
perror("open");
exit(2);
}
snprintf(spath, 1024, "/proc/self/fd/%d", fd);
snprintf(dpath, 1024, "%s/%s", penv[0], penv[1]);
if (linkat(AT_FDCWD, spath, AT_FDCWD, dpath, AT_SYMLINK_FOLLOW) == 0) {
fprintf(stderr, "linkat returns successfully\n");
close(fd);
exit(3);
}
if (stat(dpath, &sbuf) == 0) {
fprintf(stderr, "stat returns successfully\n");
close(fd);
exit(4);
}
close(fd);
return (0);
}

View File

@ -0,0 +1,52 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
/* backward compat in case it's not defined */
#ifndef O_TMPFILE
#define O_TMPFILE (020000000|O_DIRECTORY)
#endif
/*
* DESCRIPTION:
* Check if the kernel support O_TMPFILE.
*/
int
main(int argc, char *argv[])
{
int fd;
struct stat buf;
if (argc < 2) {
fprintf(stderr, "Usage: %s dir\n", argv[0]);
return (2);
}
if (stat(argv[1], &buf) < 0) {
perror("stat");
return (2);
}
if (!S_ISDIR(buf.st_mode)) {
fprintf(stderr, "\"%s\" is not a directory\n", argv[1]);
return (2);
}
fd = open(argv[1], O_TMPFILE | O_WRONLY, 0666);
if (fd < 0) {
/*
* Only fail on EISDIR. If we get EOPNOTSUPP, that means
* kernel support O_TMPFILE, but the path at argv[1] doesn't.
*/
if (errno == EISDIR) {
fprintf(stderr, "kernel doesn't support O_TMPFILE\n");
return (1);
}
perror("open");
} else {
close(fd);
}
return (0);
}