zfs/module/nvpair/nvpair_alloc_spl.c

96 lines
2.6 KiB
C
Raw Normal View History

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at * usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/nvpair.h>
#include <sys/kmem.h>
static void *
nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size)
{
Implement SA based xattrs The current ZFS implementation stores xattrs on disk using a hidden directory. In this directory a file name represents the xattr name and the file contexts are the xattr binary data. This approach is very flexible and allows for arbitrarily large xattrs. However, it also suffers from a significant performance penalty. Accessing a single xattr can requires up to three disk seeks. 1) Lookup the dnode object. 2) Lookup the dnodes's xattr directory object. 3) Lookup the xattr object in the directory. To avoid this performance penalty Linux filesystems such as ext3 and xfs try to store the xattr as part of the inode on disk. When the xattr is to large to store in the inode then a single external block is allocated for them. In practice most xattrs are small and this approach works well. The addition of System Attributes (SA) to zfs provides us a clean way to make this optimization. When the dataset property 'xattr=sa' is set then xattrs will be preferentially stored as System Attributes. This allows tiny xattrs (~100 bytes) to be stored with the dnode and up to 64k of xattrs to be stored in the spill block. If additional xattr space is required, which is unlikely under Linux, they will be stored using the traditional directory approach. This optimization results in roughly a 3x performance improvement when accessing xattrs which brings zfs roughly to parity with ext4 and xfs (see table below). When multiple xattrs are stored per-file the performance improvements are even greater because all of the xattrs stored in the spill block will be cached. However, by default SA based xattrs are disabled in the Linux port to maximize compatibility with other implementations. If you do enable SA based xattrs then they will not be visible on platforms which do not support this feature. ---------------------------------------------------------------------- Time in seconds to get/set one xattr of N bytes on 100,000 files ------+--------------------------------+------------------------------ | setxattr | getxattr bytes | ext4 xfs zfs-dir zfs-sa | ext4 xfs zfs-dir zfs-sa ------+--------------------------------+------------------------------ 1 | 2.33 31.88 21.50 4.57 | 2.35 2.64 6.29 2.43 32 | 2.79 30.68 21.98 4.60 | 2.44 2.59 6.78 2.48 256 | 3.25 31.99 21.36 5.92 | 2.32 2.71 6.22 3.14 1024 | 3.30 32.61 22.83 8.45 | 2.40 2.79 6.24 3.27 4096 | 3.57 317.46 22.52 10.73 | 2.78 28.62 6.90 3.94 16384 | n/a 2342.39 34.30 19.20 | n/a 45.44 145.90 7.55 65536 | n/a 2941.39 128.15 131.32* | n/a 141.92 256.85 262.12* Legend: * ext4 - Stock RHEL6.1 ext4 mounted with '-o user_xattr'. * xfs - Stock RHEL6.1 xfs mounted with default options. * zfs-dir - Directory based xattrs only. * zfs-sa - Prefer SAs but spill in to directories as needed, a trailing * indicates overflow in to directories occured. NOTE: Ext4 supports 4096 bytes of xattr name/value pairs per file. NOTE: XFS and ZFS have no limit on xattr name/value pairs per file. NOTE: Linux limits individual name/value pairs to 65536 bytes. NOTE: All setattr/getattr's were done after dropping the cache. NOTE: All tests were run against a single hard drive. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #443
2011-10-24 23:55:20 +00:00
return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG));
}
static void *
nv_alloc_pushpage_spl(nv_alloc_t *nva, size_t size)
{
return (kmem_alloc(size, KM_PUSHPAGE | KM_NODEBUG));
}
static void *
nv_alloc_nosleep_spl(nv_alloc_t *nva, size_t size)
{
return (kmem_alloc(size, KM_NOSLEEP));
}
static void
nv_free_spl(nv_alloc_t *nva, void *buf, size_t size)
{
kmem_free(buf, size);
}
const nv_alloc_ops_t spl_sleep_ops_def = {
NULL, /* nv_ao_init() */
NULL, /* nv_ao_fini() */
nv_alloc_sleep_spl, /* nv_ao_alloc() */
nv_free_spl, /* nv_ao_free() */
NULL /* nv_ao_reset() */
};
const nv_alloc_ops_t spl_pushpage_ops_def = {
NULL, /* nv_ao_init() */
NULL, /* nv_ao_fini() */
nv_alloc_pushpage_spl, /* nv_ao_alloc() */
nv_free_spl, /* nv_ao_free() */
NULL /* nv_ao_reset() */
};
const nv_alloc_ops_t spl_nosleep_ops_def = {
NULL, /* nv_ao_init() */
NULL, /* nv_ao_fini() */
nv_alloc_nosleep_spl, /* nv_ao_alloc() */
nv_free_spl, /* nv_ao_free() */
NULL /* nv_ao_reset() */
};
nv_alloc_t nv_alloc_sleep_def = {
&spl_sleep_ops_def,
NULL
};
nv_alloc_t nv_alloc_pushpage_def = {
&spl_pushpage_ops_def,
NULL
};
nv_alloc_t nv_alloc_nosleep_def = {
&spl_nosleep_ops_def,
NULL
};
nv_alloc_t *nv_alloc_sleep = &nv_alloc_sleep_def;
nv_alloc_t *nv_alloc_pushpage = &nv_alloc_pushpage_def;
nv_alloc_t *nv_alloc_nosleep = &nv_alloc_nosleep_def;