Illumos #3035 LZ4 compression support in ZFS and GRUB

3035 LZ4 compression support in ZFS and GRUB

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Christopher Siden <csiden@delphix.com>

References:
  illumos/illumos-gate@a6f561b4ae
  https://www.illumos.org/issues/3035
  http://wiki.illumos.org/display/illumos/LZ4+Compression+In+ZFS

This patch has been slightly modified from the upstream Illumos
version to be compatible with Linux.  Due to the very limited
stack space in the kernel a lz4 workspace kmem cache is used.
Since we are using gcc we are also able to take advantage of the
gcc optimized __builtin_ctz functions.

Support for GRUB has been dropped from this patch.  That code
is available but those changes will need to made to the upstream
GRUB package.

Lastly, several hunks of dead code were dropped for clarity.  They
include the functions real_LZ4_uncompress(), LZ4_compressBound()
and the Visual Studio specific hunks wrapped in _MSC_VER.

Ported-by: Eric Dillmann <eric@jave.fr>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #1217
This commit is contained in:
Eric Dillmann 2013-01-23 10:54:30 +01:00 committed by Brian Behlendorf
parent ff5b1c8065
commit 9759c60f1a
13 changed files with 1188 additions and 2 deletions

View File

@ -25,6 +25,7 @@
/* /*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/ */
#ifndef _ZIO_H #ifndef _ZIO_H
@ -108,6 +109,7 @@ enum zio_compress {
ZIO_COMPRESS_GZIP_8, ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9, ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_ZLE, ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_LZ4,
ZIO_COMPRESS_FUNCTIONS ZIO_COMPRESS_FUNCTIONS
}; };
@ -116,6 +118,7 @@ enum zio_compress {
#define BOOTFS_COMPRESS_VALID(compress) \ #define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \ ((compress) == ZIO_COMPRESS_LZJB || \
(compress) == ZIO_COMPRESS_LZ4 || \
((compress) == ZIO_COMPRESS_ON && \ ((compress) == ZIO_COMPRESS_ON && \
ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \ ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \
(compress) == ZIO_COMPRESS_OFF) (compress) == ZIO_COMPRESS_OFF)

View File

@ -53,6 +53,12 @@ typedef struct zio_compress_info {
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS]; extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
/*
* lz4 compression init & free
*/
extern void lz4_init(void);
extern void lz4_fini(void);
/* /*
* Compression routines. * Compression routines.
*/ */
@ -68,6 +74,10 @@ extern size_t zle_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level); int level);
extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len, extern int zle_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level); int level);
extern size_t lz4_compress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lz4_decompress(void *src, void *dst, size_t s_len, size_t d_len,
int level);
/* /*
* Compress and decompress data if necessary. * Compress and decompress data if necessary.

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/ */
#ifndef _ZFEATURE_COMMON_H #ifndef _ZFEATURE_COMMON_H
@ -52,6 +53,7 @@ typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
typedef enum spa_feature { typedef enum spa_feature {
SPA_FEATURE_ASYNC_DESTROY, SPA_FEATURE_ASYNC_DESTROY,
SPA_FEATURE_EMPTY_BPOBJ, SPA_FEATURE_EMPTY_BPOBJ,
SPA_FEATURE_LZ4_COMPRESS,
SPA_FEATURES SPA_FEATURES
} spa_feature_t; } spa_feature_t;

View File

@ -48,6 +48,7 @@ libzpool_la_SOURCES = \
$(top_srcdir)/module/zfs/fm.c \ $(top_srcdir)/module/zfs/fm.c \
$(top_srcdir)/module/zfs/gzip.c \ $(top_srcdir)/module/zfs/gzip.c \
$(top_srcdir)/module/zfs/lzjb.c \ $(top_srcdir)/module/zfs/lzjb.c \
$(top_srcdir)/module/zfs/lz4.c \
$(top_srcdir)/module/zfs/metaslab.c \ $(top_srcdir)/module/zfs/metaslab.c \
$(top_srcdir)/module/zfs/refcount.c \ $(top_srcdir)/module/zfs/refcount.c \
$(top_srcdir)/module/zfs/rrwlock.c \ $(top_srcdir)/module/zfs/rrwlock.c \

View File

@ -1,5 +1,6 @@
'\" te '\" te
.\" Copyright (c) 2012 by Delphix. All rights reserved. .\" Copyright (c) 2012 by Delphix. All rights reserved.
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development .\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except .\" and Distribution License (the "License"). You may not use this file except
.\" in compliance with the License. You can obtain a copy of the license at .\" in compliance with the License. You can obtain a copy of the license at
@ -197,5 +198,38 @@ This feature is \fBactive\fR while there are any filesystems, volumes,
or snapshots which were created after enabling this feature. or snapshots which were created after enabling this feature.
.RE .RE
.sp
.ne 2
.na
\fB\fBlz4_compress\fR\fR
.ad
.RS 4n
.TS
l l .
GUID org.illumos:lz4_compress
READ\-ONLY COMPATIBLE no
DEPENDENCIES none
.TE
\fBlz4\fR is a high-performance real-time compression algorithm that
features significantly faster compression and decompression as well as a
higher compression ratio than the older \fBlzjb\fR compression.
Typically, \fBlz4\fR compression is approximately 50% faster on
compressible data and 200% faster on incompressible data than
\fBlzjb\fR. It is also approximately 80% faster on decompression, while
giving approximately 10% better compression ratio.
When the \fBlz4_compress\fR feature is set to \fBenabled\fR, the
administrator can turn on \fBlz4\fR compression on any dataset on the
pool using the \fBzfs\fR(1M) command. Please note that doing so will
immediately activate the \fBlz4_compress\fR feature on the underlying
pool (even before any data is written). Since this feature is not
read-only compatible, this operation will render the pool unimportable
on systems without support for the \fBlz4_compress\fR feature. At the
moment, this operation cannot be reversed. Booting off of
\fBlz4\fR-compressed root pools is supported.
.RE
.SH "SEE ALSO" .SH "SEE ALSO"
\fBzpool\fR(1M) \fBzpool\fR(1M)

View File

@ -25,6 +25,7 @@
.\" Copyright (c) 2012 by Delphix. All rights reserved. .\" Copyright (c) 2012 by Delphix. All rights reserved.
.\" Copyright (c) 2012, Joyent, Inc. All rights reserved. .\" Copyright (c) 2012, Joyent, Inc. All rights reserved.
.\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved. .\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved.
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
.\" .\"
.TH zfs 8 "Jan 10, 2013" "ZFS pool 28, filesystem 5" "System Administration Commands" .TH zfs 8 "Jan 10, 2013" "ZFS pool 28, filesystem 5" "System Administration Commands"
.SH NAME .SH NAME
@ -731,7 +732,7 @@ Changing this property affects only newly-written data.
.ne 2 .ne 2
.mk .mk
.na .na
\fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR \fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR | \fBlz4\fR
.ad .ad
.sp .6 .sp .6
.RS 4n .RS 4n
@ -741,6 +742,14 @@ The \fBgzip\fR compression algorithm uses the same compression as the \fBgzip\fR
.sp .sp
The \fBzle\fR (zero-length encoding) compression algorithm is a fast and simple algorithm to eliminate runs of zeroes. The \fBzle\fR (zero-length encoding) compression algorithm is a fast and simple algorithm to eliminate runs of zeroes.
.sp .sp
The \fBlz4\fR compression algorithm is a high-performance replacement
for the \fBlzjb\fR algorithm. It features significantly faster
compression and decompression, as well as a moderately higher
compression ratio than \fBlzjb\fR, but can only be used on pools with
the \fBlz4_compress\fR feature set to \fIenabled\fR. See
\fBzpool-features\fR(5) for details on ZFS feature flags and the
\fBlz4_compress\fR feature.
.sp
This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data. This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data.
.RE .RE

View File

@ -96,6 +96,7 @@ zfs_prop_init(void)
{ "gzip-8", ZIO_COMPRESS_GZIP_8 }, { "gzip-8", ZIO_COMPRESS_GZIP_8 },
{ "gzip-9", ZIO_COMPRESS_GZIP_9 }, { "gzip-9", ZIO_COMPRESS_GZIP_9 },
{ "zle", ZIO_COMPRESS_ZLE }, { "zle", ZIO_COMPRESS_ZLE },
{ "lz4", ZIO_COMPRESS_LZ4 },
{ NULL } { NULL }
}; };
@ -211,7 +212,7 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_COMPRESSION, "compression", zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
ZIO_COMPRESS_DEFAULT, PROP_INHERIT, ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"on | off | lzjb | gzip | gzip-[1-9] | zle", "COMPRESS", "on | off | lzjb | gzip | gzip-[1-9] | zle | lz4", "COMPRESS",
compress_table); compress_table);
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN, zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,

View File

@ -32,6 +32,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/dsl_synctask.o
$(MODULE)-objs += @top_srcdir@/module/zfs/fm.o $(MODULE)-objs += @top_srcdir@/module/zfs/fm.o
$(MODULE)-objs += @top_srcdir@/module/zfs/gzip.o $(MODULE)-objs += @top_srcdir@/module/zfs/gzip.o
$(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o $(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o
$(MODULE)-objs += @top_srcdir@/module/zfs/lz4.o
$(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o $(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o
$(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o $(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o
$(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o $(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o

1004
module/zfs/lz4.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/ */
#ifdef _KERNEL #ifdef _KERNEL
@ -160,4 +161,7 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ, zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
"com.delphix:empty_bpobj", "empty_bpobj", "com.delphix:empty_bpobj", "empty_bpobj",
"Snapshots use less space.", B_TRUE, B_FALSE, NULL); "Snapshots use less space.", B_TRUE, B_FALSE, NULL);
zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
"org.illumos:lz4_compress", "lz4_compress",
"LZ4 compression algorithm support.", B_FALSE, B_FALSE, NULL);
} }

View File

@ -27,6 +27,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/ */
#include <sys/types.h> #include <sys/types.h>
@ -75,6 +76,8 @@
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/fm/util.h> #include <sys/fm/util.h>
#include <sys/zfeature.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include "zfs_namecheck.h" #include "zfs_namecheck.h"
@ -128,6 +131,12 @@ static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
boolean_t *); boolean_t *);
int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **); int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature);
static int zfs_prop_activate_feature_check(void *arg1, void *arg2,
dmu_tx_t *tx);
static void zfs_prop_activate_feature_sync(void *arg1, void *arg2,
dmu_tx_t *tx);
static void static void
history_str_free(char *buf) history_str_free(char *buf)
{ {
@ -2196,6 +2205,40 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
} }
break; break;
} }
case ZFS_PROP_COMPRESSION:
{
if (intval == ZIO_COMPRESS_LZ4) {
zfeature_info_t *feature =
&spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
spa_t *spa;
dsl_pool_t *dp;
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
return (err);
dp = spa->spa_dsl_pool;
/*
* Setting the LZ4 compression algorithm activates
* the feature.
*/
if (!spa_feature_is_active(spa, feature)) {
if ((err = zfs_prop_activate_feature(dp,
feature)) != 0) {
spa_close(spa, FTAG);
return (err);
}
}
spa_close(spa, FTAG);
}
/*
* We still want the default set action to be performed in the
* caller, we only performed zfeature settings here.
*/
err = -1;
break;
}
default: default:
err = -1; err = -1;
@ -3416,6 +3459,22 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
SPA_VERSION_ZLE_COMPRESSION)) SPA_VERSION_ZLE_COMPRESSION))
return (ENOTSUP); return (ENOTSUP);
if (intval == ZIO_COMPRESS_LZ4) {
zfeature_info_t *feature =
&spa_feature_table[
SPA_FEATURE_LZ4_COMPRESS];
spa_t *spa;
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
return (err);
if (!spa_feature_is_enabled(spa, feature)) {
spa_close(spa, FTAG);
return (ENOTSUP);
}
spa_close(spa, FTAG);
}
/* /*
* If this is a bootable dataset then * If this is a bootable dataset then
* verify that the compression algorithm * verify that the compression algorithm
@ -3461,6 +3520,56 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
return (zfs_secpolicy_setprop(dsname, prop, pair, CRED())); return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
} }
/*
* Activates a feature on a pool in response to a property setting. This
* creates a new sync task which modifies the pool to reflect the feature
* as being active.
*/
static int
zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature)
{
int err;
/* EBUSY here indicates that the feature is already active */
err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check,
zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2);
if (err != 0 && err != EBUSY)
return (err);
else
return (0);
}
/*
* Checks for a race condition to make sure we don't increment a feature flag
* multiple times.
*/
/*ARGSUSED*/
static int
zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
if (!spa_feature_is_active(spa, feature))
return (0);
else
return (EBUSY);
}
/*
* The callback invoked on feature activation in the sync task caused by
* zfs_prop_activate_feature.
*/
static void
zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_feature_incr(spa, feature, tx);
}
/* /*
* Removes properties from the given props list that fail permission checks * Removes properties from the given props list that fail permission checks
* needed to clear them and to restore them in case of a receive error. For each * needed to clear them and to restore them in case of a receive error. For each

View File

@ -210,6 +210,8 @@ zio_init(void)
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8); zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
zio_inject_init(); zio_inject_init();
lz4_init();
} }
void void
@ -238,6 +240,8 @@ zio_fini(void)
kmem_cache_destroy(zio_cache); kmem_cache_destroy(zio_cache);
zio_inject_fini(); zio_inject_fini();
lz4_fini();
} }
/* /*

View File

@ -23,6 +23,9 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Use is subject to license terms.
*/ */
/*
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
#include <sys/compress.h> #include <sys/compress.h>
@ -50,6 +53,7 @@ zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
{gzip_compress, gzip_decompress, 8, "gzip-8"}, {gzip_compress, gzip_decompress, 8, "gzip-8"},
{gzip_compress, gzip_decompress, 9, "gzip-9"}, {gzip_compress, gzip_decompress, 9, "gzip-9"},
{zle_compress, zle_decompress, 64, "zle"}, {zle_compress, zle_decompress, 64, "zle"},
{lz4_compress, lz4_decompress, 0, "lz4"},
}; };
enum zio_compress enum zio_compress