From cf63739191b6cac629d053930a4aea592bca3819 Mon Sep 17 00:00:00 2001 From: Tom Caputi Date: Fri, 9 Mar 2018 16:37:15 -0500 Subject: [PATCH] QAT support for AES-GCM This patch adds support for acceleration of AES-GCM encryption with Intel Quick Assist Technology. Reviewed-by: Brian Behlendorf Signed-off-by: Chengfeix Zhu Signed-off-by: Weigang Li Signed-off-by: Tom Caputi Closes #7282 --- man/man5/zfs-module-parameters.5 | 6 +- module/zfs/Makefile.in | 2 + module/zfs/gzip.c | 6 +- module/zfs/qat.c | 102 +++++++++ module/zfs/qat.h | 176 +++++++++++++++ module/zfs/qat_compress.c | 223 +++++-------------- module/zfs/qat_compress.h | 48 ---- module/zfs/qat_crypt.c | 371 +++++++++++++++++++++++++++++++ module/zfs/spa_misc.c | 2 +- module/zfs/zio_crypt.c | 54 ++++- 10 files changed, 758 insertions(+), 232 deletions(-) create mode 100644 module/zfs/qat.c create mode 100644 module/zfs/qat.h delete mode 100644 module/zfs/qat_compress.h create mode 100644 module/zfs/qat_crypt.c diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5 index b7d32e069f..a4b6b4f3ae 100644 --- a/man/man5/zfs-module-parameters.5 +++ b/man/man5/zfs-module-parameters.5 @@ -2352,9 +2352,9 @@ Default value: \fB1\fR. \fBzfs_qat_disable\fR (int) .ad .RS 12n -This tunable disables qat hardware acceleration for gzip compression. -It is available only if qat acceleration is compiled in and qat driver -is present. +This tunable disables qat hardware acceleration for gzip compression and. +AES-GCM encryption. It is available only if qat acceleration is compiled in +and the qat driver is present. .sp Use \fB1\fR for yes and \fB0\fR for no (default). .RE diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in index 084c1ac233..fe0d5b523c 100644 --- a/module/zfs/Makefile.in +++ b/module/zfs/Makefile.in @@ -132,7 +132,9 @@ $(MODULE)-objs += zrlock.o $(MODULE)-objs += zvol.o $(MODULE)-objs += dsl_destroy.o $(MODULE)-objs += dsl_userhold.o +$(MODULE)-objs += qat.o $(MODULE)-objs += qat_compress.o +$(MODULE)-objs += qat_crypt.o # Suppress incorrect warnings from versions of objtool which are not # aware of x86 EVEX prefix instructions used for AVX512. diff --git a/module/zfs/gzip.c b/module/zfs/gzip.c index 6c8fdd308a..6e4db718c2 100644 --- a/module/zfs/gzip.c +++ b/module/zfs/gzip.c @@ -28,7 +28,7 @@ #include #include -#include "qat_compress.h" +#include "qat.h" #ifdef _KERNEL @@ -58,7 +58,7 @@ gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) ASSERT(d_len <= s_len); /* check if hardware accelerator can be used */ - if (qat_use_accel(s_len)) { + if (qat_dc_use_accel(s_len)) { if (qat_compress(QAT_COMPRESS, s_start, s_len, d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS) return ((size_t)dstlen); @@ -85,7 +85,7 @@ gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) ASSERT(d_len >= s_len); /* check if hardware accelerator can be used */ - if (qat_use_accel(d_len)) { + if (qat_dc_use_accel(d_len)) { if (qat_compress(QAT_DECOMPRESS, s_start, s_len, d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS) return (0); diff --git a/module/zfs/qat.c b/module/zfs/qat.c new file mode 100644 index 0000000000..4dc34f1e62 --- /dev/null +++ b/module/zfs/qat.c @@ -0,0 +1,102 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#if defined(_KERNEL) && defined(HAVE_QAT) +#include +#include "qat.h" + +qat_stats_t qat_stats = { + { "comp_requests", KSTAT_DATA_UINT64 }, + { "comp_total_in_bytes", KSTAT_DATA_UINT64 }, + { "comp_total_out_bytes", KSTAT_DATA_UINT64 }, + { "decomp_requests", KSTAT_DATA_UINT64 }, + { "decomp_total_in_bytes", KSTAT_DATA_UINT64 }, + { "decomp_total_out_bytes", KSTAT_DATA_UINT64 }, + { "dc_fails", KSTAT_DATA_UINT64 }, + { "encrypt_requests", KSTAT_DATA_UINT64 }, + { "encrypt_total_in_bytes", KSTAT_DATA_UINT64 }, + { "encrypt_total_out_bytes", KSTAT_DATA_UINT64 }, + { "decrypt_requests", KSTAT_DATA_UINT64 }, + { "decrypt_total_in_bytes", KSTAT_DATA_UINT64 }, + { "decrypt_total_out_bytes", KSTAT_DATA_UINT64 }, + { "crypt_fails", KSTAT_DATA_UINT64 }, +}; + +static kstat_t *qat_ksp = NULL; +int zfs_qat_disable = 0; + +CpaStatus +qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes) +{ + *pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL); + if (*pp_mem_addr == NULL) + return (CPA_STATUS_RESOURCE); + return (CPA_STATUS_SUCCESS); +} + +void +qat_mem_free_contig(void **pp_mem_addr) +{ + if (*pp_mem_addr != NULL) { + kfree(*pp_mem_addr); + *pp_mem_addr = NULL; + } +} + +int +qat_init(void) +{ + int ret; + + ret = qat_dc_init(); + if (ret != 0) + return (ret); + + ret = qat_crypt_init(); + if (ret != 0) { + qat_dc_fini(); + return (ret); + } + + qat_ksp = kstat_create("zfs", 0, "qat", "misc", + KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + if (qat_ksp != NULL) { + qat_ksp->ks_data = &qat_stats; + kstat_install(qat_ksp); + } + + return (0); +} + +void +qat_fini(void) +{ + if (qat_ksp != NULL) { + kstat_delete(qat_ksp); + qat_ksp = NULL; + } + + qat_crypt_fini(); + qat_dc_fini(); +} + +#endif diff --git a/module/zfs/qat.h b/module/zfs/qat.h new file mode 100644 index 0000000000..44f9cb532f --- /dev/null +++ b/module/zfs/qat.h @@ -0,0 +1,176 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#ifndef _SYS_QAT_H +#define _SYS_QAT_H + +typedef enum qat_compress_dir { + QAT_DECOMPRESS = 0, + QAT_COMPRESS = 1, +} qat_compress_dir_t; + +typedef enum qat_encrypt_dir { + QAT_DECRYPT = 0, + QAT_ENCRYPT = 1, +} qat_encrypt_dir_t; + + +#if defined(_KERNEL) && defined(HAVE_QAT) +#include +#include +#include "cpa.h" +#include "dc/cpa_dc.h" +#include "lac/cpa_cy_sym.h" + +/* + * Timeout - no response from hardware after 0.5 seconds + */ +#define QAT_TIMEOUT_MS 500 + +/* + * The minimal and maximal buffer size, which are not restricted + * in the QAT hardware, but with the input buffer size between 4KB + * and 128KB, the hardware can provide the optimal performance. + */ +#define QAT_MIN_BUF_SIZE (4*1024) +#define QAT_MAX_BUF_SIZE (128*1024) + +/* + * Used for qat kstat. + */ +typedef struct qat_stats { + /* + * Number of jobs submitted to qat compression engine. + */ + kstat_named_t comp_requests; + /* + * Total bytes sent to qat compression engine. + */ + kstat_named_t comp_total_in_bytes; + /* + * Total bytes output from qat compression engine. + */ + kstat_named_t comp_total_out_bytes; + /* + * Number of jobs submitted to qat de-compression engine. + */ + kstat_named_t decomp_requests; + /* + * Total bytes sent to qat de-compression engine. + */ + kstat_named_t decomp_total_in_bytes; + /* + * Total bytes output from qat de-compression engine. + */ + kstat_named_t decomp_total_out_bytes; + /* + * Number of fails in the qat compression / decompression engine. + * Note: when qat fail happens, it doesn't mean a critical hardware + * issue. Sometimes it is because the output buffer is not big enough. + * The compression job will be transfered to gzip software + * implementation, so the functionality of ZFS is not impacted. + */ + kstat_named_t dc_fails; + + /* + * Number of jobs submitted to qat encryption engine. + */ + kstat_named_t encrypt_requests; + /* + * Total bytes sent to qat encryption engine. + */ + kstat_named_t encrypt_total_in_bytes; + /* + * Total bytes output from qat encryption engine. + */ + kstat_named_t encrypt_total_out_bytes; + /* + * Number of jobs submitted to qat decryption engine. + */ + kstat_named_t decrypt_requests; + /* + * Total bytes sent to qat decryption engine. + */ + kstat_named_t decrypt_total_in_bytes; + /* + * Total bytes output from qat decryption engine. + */ + kstat_named_t decrypt_total_out_bytes; + /* + * Number of fails in the qat encryption / decryption engine. + * Note: when qat fail happens, it doesn't mean a critical hardware + * issue. Sometimes it is because the output buffer is not big enough. + * The encryption job will be transfered to the software implementation, + * so the functionality of ZFS is not impacted. + */ + kstat_named_t crypt_fails; +} qat_stats_t; + +#define QAT_STAT_INCR(stat, val) \ + atomic_add_64(&qat_stats.stat.value.ui64, (val)) +#define QAT_STAT_BUMP(stat) \ + QAT_STAT_INCR(stat, 1) + +extern qat_stats_t qat_stats; +extern int zfs_qat_disable; + +/* inlined for performance */ +static inline struct page * +qat_mem_to_page(void *addr) +{ + if (!is_vmalloc_addr(addr)) + return (virt_to_page(addr)); + + return (vmalloc_to_page(addr)); +} + +CpaStatus qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes); +void qat_mem_free_contig(void **pp_mem_addr); +#define QAT_PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes) \ + qat_mem_alloc_contig((void *)(pp_mem_addr), (size_bytes)) +#define QAT_PHYS_CONTIG_FREE(p_mem_addr) \ + qat_mem_free_contig((void *)&(p_mem_addr)) + +extern int qat_dc_init(void); +extern void qat_dc_fini(void); +extern int qat_crypt_init(void); +extern void qat_crypt_fini(void); +extern int qat_init(void); +extern void qat_fini(void); + +extern boolean_t qat_dc_use_accel(size_t s_len); +extern boolean_t qat_crypt_use_accel(size_t s_len); +extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len, + char *dst, int dst_len, size_t *c_len); +extern int qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, + uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf, + crypto_key_t *key, uint64_t crypt, uint32_t enc_len); +#else +#define CPA_STATUS_SUCCESS 0 +#define qat_init() +#define qat_fini() +#define qat_dc_use_accel(s_len) 0 +#define qat_crypt_use_accel(s_len) 0 +#define qat_compress(dir, s, sl, d, dl, cl) 0 +#define qat_crypt(dir, s, d, a, al, i, db, k, c, el) 0 +#endif + +#endif /* _SYS_QAT_H */ diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c index 62655f56db..3d756b53d2 100644 --- a/module/zfs/qat_compress.c +++ b/module/zfs/qat_compress.c @@ -25,12 +25,7 @@ #include #include #include -#include "qat_compress.h" - -/* - * Timeout - no response from hardware after 0.5 seconds - */ -#define TIMEOUT_MS 500 +#include "qat.h" /* * Max instances in QAT device, each instance is a channel to submit @@ -38,7 +33,7 @@ * and session arrays, the actual number of instances are defined in * the QAT driver's configure file. */ -#define MAX_INSTANCES 48 +#define QAT_DC_MAX_INSTANCES 48 /* * ZLIB head and foot size @@ -46,89 +41,20 @@ #define ZLIB_HEAD_SZ 2 #define ZLIB_FOOT_SZ 4 -/* - * The minimal and maximal buffer size, which are not restricted - * in the QAT hardware, but with the input buffer size between 4KB - * and 128KB, the hardware can provide the optimal performance. - */ -#define QAT_MIN_BUF_SIZE (4*1024) -#define QAT_MAX_BUF_SIZE (128*1024) - -/* - * Used for qat kstat. - */ -typedef struct qat_stats { - /* - * Number of jobs submitted to qat compression engine. - */ - kstat_named_t comp_requests; - /* - * Total bytes sent to qat compression engine. - */ - kstat_named_t comp_total_in_bytes; - /* - * Total bytes output from qat compression engine. - */ - kstat_named_t comp_total_out_bytes; - /* - * Number of jobs submitted to qat de-compression engine. - */ - kstat_named_t decomp_requests; - /* - * Total bytes sent to qat de-compression engine. - */ - kstat_named_t decomp_total_in_bytes; - /* - * Total bytes output from qat de-compression engine. - */ - kstat_named_t decomp_total_out_bytes; - /* - * Number of fails in qat engine. - * Note: when qat fail happens, it doesn't mean a critical hardware - * issue, sometimes it is because the output buffer is not big enough, - * and the compression job will be transfered to gzip software again, - * so the functionality of ZFS is not impacted. - */ - kstat_named_t dc_fails; -} qat_stats_t; - -qat_stats_t qat_stats = { - { "comp_reqests", KSTAT_DATA_UINT64 }, - { "comp_total_in_bytes", KSTAT_DATA_UINT64 }, - { "comp_total_out_bytes", KSTAT_DATA_UINT64 }, - { "decomp_reqests", KSTAT_DATA_UINT64 }, - { "decomp_total_in_bytes", KSTAT_DATA_UINT64 }, - { "decomp_total_out_bytes", KSTAT_DATA_UINT64 }, - { "dc_fails", KSTAT_DATA_UINT64 }, -}; - -static kstat_t *qat_ksp; -static CpaInstanceHandle dc_inst_handles[MAX_INSTANCES]; -static CpaDcSessionHandle session_handles[MAX_INSTANCES]; -static CpaBufferList **buffer_array[MAX_INSTANCES]; +static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES]; +static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES]; +static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES]; static Cpa16U num_inst = 0; static Cpa32U inst_num = 0; -static boolean_t qat_init_done = B_FALSE; -int zfs_qat_disable = 0; +static boolean_t qat_dc_init_done = B_FALSE; -#define QAT_STAT_INCR(stat, val) \ - atomic_add_64(&qat_stats.stat.value.ui64, (val)); -#define QAT_STAT_BUMP(stat) \ - QAT_STAT_INCR(stat, 1); - -#define PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes) \ - mem_alloc_contig((void *)(pp_mem_addr), (size_bytes)) - -#define PHYS_CONTIG_FREE(p_mem_addr) \ - mem_free_contig((void *)&(p_mem_addr)) - -static inline struct page * -mem_to_page(void *addr) +boolean_t +qat_dc_use_accel(size_t s_len) { - if (!is_vmalloc_addr(addr)) - return (virt_to_page(addr)); - - return (vmalloc_to_page(addr)); + return (!zfs_qat_disable && + qat_dc_init_done && + s_len >= QAT_MIN_BUF_SIZE && + s_len <= QAT_MAX_BUF_SIZE); } static void @@ -138,26 +64,8 @@ qat_dc_callback(void *p_callback, CpaStatus status) complete((struct completion *)p_callback); } -static inline CpaStatus -mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes) -{ - *pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL); - if (*pp_mem_addr == NULL) - return (CPA_STATUS_RESOURCE); - return (CPA_STATUS_SUCCESS); -} - -static inline void -mem_free_contig(void **pp_mem_addr) -{ - if (*pp_mem_addr != NULL) { - kfree(*pp_mem_addr); - *pp_mem_addr = NULL; - } -} - static void -qat_clean(void) +qat_dc_clean(void) { Cpa16U buff_num = 0; Cpa16U num_inter_buff_lists = 0; @@ -165,7 +73,7 @@ qat_clean(void) for (i = 0; i < num_inst; i++) { cpaDcStopInstance(dc_inst_handles[i]); - PHYS_CONTIG_FREE(session_handles[i]); + QAT_PHYS_CONTIG_FREE(session_handles[i]); /* free intermediate buffers */ if (buffer_array[i] != NULL) { cpaDcGetNumIntermediateBuffers( @@ -175,24 +83,24 @@ qat_clean(void) CpaBufferList *buffer_inter = buffer_array[i][buff_num]; if (buffer_inter->pBuffers) { - PHYS_CONTIG_FREE( + QAT_PHYS_CONTIG_FREE( buffer_inter->pBuffers->pData); - PHYS_CONTIG_FREE( + QAT_PHYS_CONTIG_FREE( buffer_inter->pBuffers); } - PHYS_CONTIG_FREE( + QAT_PHYS_CONTIG_FREE( buffer_inter->pPrivateMetaData); - PHYS_CONTIG_FREE(buffer_inter); + QAT_PHYS_CONTIG_FREE(buffer_inter); } } } num_inst = 0; - qat_init_done = B_FALSE; + qat_dc_init_done = B_FALSE; } int -qat_init(void) +qat_dc_init(void) { CpaStatus status = CPA_STATUS_SUCCESS; Cpa32U sess_size = 0; @@ -204,11 +112,15 @@ qat_init(void) Cpa16U i; status = cpaDcGetNumInstances(&num_inst); - if (status != CPA_STATUS_SUCCESS || num_inst == 0) + if (status != CPA_STATUS_SUCCESS) return (-1); - if (num_inst > MAX_INSTANCES) - num_inst = MAX_INSTANCES; + /* if the user has configured no QAT compression units just return */ + if (num_inst == 0) + return (0); + + if (num_inst > QAT_DC_MAX_INSTANCES) + num_inst = QAT_DC_MAX_INSTANCES; status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]); if (status != CPA_STATUS_SUCCESS) @@ -226,25 +138,25 @@ qat_init(void) dc_inst_handles[i], &num_inter_buff_lists); if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0) - status = PHYS_CONTIG_ALLOC(&buffer_array[i], + status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i], num_inter_buff_lists * sizeof (CpaBufferList *)); for (buff_num = 0; buff_num < num_inter_buff_lists; buff_num++) { if (status == CPA_STATUS_SUCCESS) - status = PHYS_CONTIG_ALLOC( + status = QAT_PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num], sizeof (CpaBufferList)); if (status == CPA_STATUS_SUCCESS) - status = PHYS_CONTIG_ALLOC( + status = QAT_PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]-> pPrivateMetaData, buff_meta_size); if (status == CPA_STATUS_SUCCESS) - status = PHYS_CONTIG_ALLOC( + status = QAT_PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]->pBuffers, sizeof (CpaFlatBuffer)); @@ -255,7 +167,7 @@ qat_init(void) * output buffer, which is 2x max buffer * size here. */ - status = PHYS_CONTIG_ALLOC( + status = QAT_PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]->pBuffers-> pData, 2 * QAT_MAX_BUF_SIZE); if (status != CPA_STATUS_SUCCESS) @@ -284,7 +196,7 @@ qat_init(void) if (status != CPA_STATUS_SUCCESS) goto fail; - PHYS_CONTIG_ALLOC(&session_handles[i], sess_size); + QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size); if (session_handles[i] == NULL) goto fail; @@ -295,39 +207,20 @@ qat_init(void) goto fail; } - qat_ksp = kstat_create("zfs", 0, "qat", "misc", - KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - if (qat_ksp != NULL) { - qat_ksp->ks_data = &qat_stats; - kstat_install(qat_ksp); - } - - qat_init_done = B_TRUE; + qat_dc_init_done = B_TRUE; return (0); fail: - qat_clean(); + qat_dc_clean(); return (-1); } void -qat_fini(void) +qat_dc_fini(void) { - qat_clean(); + if (!qat_dc_init_done) + return; - if (qat_ksp != NULL) { - kstat_delete(qat_ksp); - qat_ksp = NULL; - } -} - -boolean_t -qat_use_accel(size_t s_len) -{ - return (!zfs_qat_disable && - qat_init_done && - s_len >= QAT_MIN_BUF_SIZE && - s_len <= QAT_MAX_BUF_SIZE); + qat_dc_clean(); } int @@ -364,11 +257,11 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) + (num_dst_buf * sizeof (CpaFlatBuffer)); - if (PHYS_CONTIG_ALLOC(&in_pages, + if (QAT_PHYS_CONTIG_ALLOC(&in_pages, num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) goto fail; - if (PHYS_CONTIG_ALLOC(&out_pages, + if (QAT_PHYS_CONTIG_ALLOC(&out_pages, num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) goto fail; @@ -378,18 +271,18 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf, &buffer_meta_size); - if (PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) != + if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) != CPA_STATUS_SUCCESS) goto fail; cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf, &buffer_meta_size); - if (PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) != + if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) != CPA_STATUS_SUCCESS) goto fail; /* build source buffer list */ - if (PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) != + if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) != CPA_STATUS_SUCCESS) goto fail; @@ -398,7 +291,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, buf_list_src->pBuffers = flat_buf_src; /* always point to first one */ /* build destination buffer list */ - if (PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) != + if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) != CPA_STATUS_SUCCESS) goto fail; @@ -412,7 +305,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, data = src; page_num = 0; while (bytes_left > 0) { - in_page = mem_to_page(data); + in_page = qat_mem_to_page(data); in_pages[page_num] = in_page; flat_buf_src->pData = kmap(in_page); flat_buf_src->dataLenInBytes = @@ -431,7 +324,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, data = dst; page_num = 0; while (bytes_left > 0) { - out_page = mem_to_page(data); + out_page = qat_mem_to_page(data); flat_buf_dst->pData = kmap(out_page); out_pages[page_num] = out_page; flat_buf_dst->dataLenInBytes = @@ -465,7 +358,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, /* we now wait until the completion of the operation. */ if (!wait_for_completion_interruptible_timeout(&complete, - TIMEOUT_MS)) { + QAT_TIMEOUT_MS)) { status = CPA_STATUS_FAIL; goto fail; } @@ -508,7 +401,8 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, ret = 0; - } else if (dir == QAT_DECOMPRESS) { + } else { + ASSERT3U(dir, ==, QAT_DECOMPRESS); QAT_STAT_BUMP(decomp_requests); QAT_STAT_INCR(decomp_total_in_bytes, src_len); @@ -529,7 +423,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len, /* we now wait until the completion of the operation. */ if (!wait_for_completion_interruptible_timeout(&complete, - TIMEOUT_MS)) { + QAT_TIMEOUT_MS)) { status = CPA_STATUS_FAIL; goto fail; } @@ -557,7 +451,7 @@ fail: page_num++) { kunmap(in_pages[page_num]); } - PHYS_CONTIG_FREE(in_pages); + QAT_PHYS_CONTIG_FREE(in_pages); } if (out_pages) { @@ -566,18 +460,15 @@ fail: page_num++) { kunmap(out_pages[page_num]); } - PHYS_CONTIG_FREE(out_pages); + QAT_PHYS_CONTIG_FREE(out_pages); } - PHYS_CONTIG_FREE(buffer_meta_src); - PHYS_CONTIG_FREE(buffer_meta_dst); - PHYS_CONTIG_FREE(buf_list_src); - PHYS_CONTIG_FREE(buf_list_dst); + QAT_PHYS_CONTIG_FREE(buffer_meta_src); + QAT_PHYS_CONTIG_FREE(buffer_meta_dst); + QAT_PHYS_CONTIG_FREE(buf_list_src); + QAT_PHYS_CONTIG_FREE(buf_list_dst); return (ret); } -module_param(zfs_qat_disable, int, 0644); -MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT compression"); - #endif diff --git a/module/zfs/qat_compress.h b/module/zfs/qat_compress.h deleted file mode 100644 index ff074646fd..0000000000 --- a/module/zfs/qat_compress.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -#ifndef _SYS_QAT_COMPRESS_H -#define _SYS_QAT_COMPRESS_H - -#if defined(_KERNEL) && defined(HAVE_QAT) -#include -#include "cpa.h" -#include "dc/cpa_dc.h" - -typedef enum qat_compress_dir { - QAT_COMPRESS = 0, - QAT_DECOMPRESS = 1, -} qat_compress_dir_t; - -extern int qat_init(void); -extern void qat_fini(void); -extern boolean_t qat_use_accel(size_t s_len); -extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len, - char *dst, int dst_len, size_t *c_len); -#else -#define CPA_STATUS_SUCCESS 0 -#define qat_init() -#define qat_fini() -#define qat_use_accel(s_len) 0 -#define qat_compress(dir, s, sl, d, dl, cl) 0 -#endif - -#endif /* _SYS_QAT_COMPRESS_H */ diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c new file mode 100644 index 0000000000..d850d9ce8f --- /dev/null +++ b/module/zfs/qat_crypt.c @@ -0,0 +1,371 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +#if defined(_KERNEL) && defined(HAVE_QAT) +#include +#include +#include +#include +#include +#include +#include "lac/cpa_cy_im.h" +#include "qat.h" + +/* + * Max instances in QAT device, each instance is a channel to submit + * jobs to QAT hardware, this is only for pre-allocating instance, + * and session arrays, the actual number of instances are defined in + * the QAT driver's configure file. + */ +#define QAT_CRYPT_MAX_INSTANCES 48 + +#define MAX_PAGE_NUM 1024 + +static boolean_t qat_crypt_init_done = B_FALSE; +static Cpa16U inst_num = 0; +static Cpa16U num_inst = 0; +static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES]; + +typedef struct cy_callback { + CpaBoolean verify_result; + struct completion complete; +} cy_callback_t; + +static void +symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation, + void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify) +{ + cy_callback_t *cb = p_callback; + + if (cb != NULL) { + /* indicate that the function has been called */ + cb->verify_result = verify; + complete(&cb->complete); + } +} + +boolean_t +qat_crypt_use_accel(size_t s_len) +{ + return (!zfs_qat_disable && + qat_crypt_init_done && + s_len >= QAT_MIN_BUF_SIZE && + s_len <= QAT_MAX_BUF_SIZE); +} + +void +qat_crypt_clean(void) +{ + for (Cpa32U i = 0; i < num_inst; i++) + cpaCyStopInstance(cy_inst_handles[i]); + + num_inst = 0; + qat_crypt_init_done = B_FALSE; +} + +int +qat_crypt_init(void) +{ + Cpa32U i; + CpaStatus status = CPA_STATUS_FAIL; + + status = cpaCyGetNumInstances(&num_inst); + if (status != CPA_STATUS_SUCCESS) + return (-1); + + /* if the user has configured no QAT encryption units just return */ + if (num_inst == 0) + return (0); + + if (num_inst > QAT_CRYPT_MAX_INSTANCES) + num_inst = QAT_CRYPT_MAX_INSTANCES; + + status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]); + if (status != CPA_STATUS_SUCCESS) + return (-1); + + for (i = 0; i < num_inst; i++) { + status = cpaCySetAddressTranslation(cy_inst_handles[i], + (void *)virt_to_phys); + if (status != CPA_STATUS_SUCCESS) + goto error; + + status = cpaCyStartInstance(cy_inst_handles[i]); + if (status != CPA_STATUS_SUCCESS) + goto error; + } + + qat_crypt_init_done = B_TRUE; + return (0); + +error: + qat_crypt_clean(); + return (-1); +} + +void +qat_crypt_fini(void) +{ + if (!qat_crypt_init_done) + return; + + qat_crypt_clean(); +} + +static CpaStatus +init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle, + CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key, + Cpa64U crypt, Cpa32U aad_len) +{ + CpaStatus status = CPA_STATUS_SUCCESS; + Cpa32U ctx_size; + Cpa32U ciper_algorithm; + Cpa32U hash_algorithm; + CpaCySymSessionSetupData sd = { 0 }; + + if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) { + return (CPA_STATUS_FAIL); + } else { + ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM; + hash_algorithm = CPA_CY_SYM_HASH_AES_GCM; + } + + sd.cipherSetupData.cipherAlgorithm = ciper_algorithm; + sd.cipherSetupData.pCipherKey = key->ck_data; + sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8; + sd.hashSetupData.hashAlgorithm = hash_algorithm; + sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH; + sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN; + sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len; + sd.sessionPriority = CPA_CY_PRIORITY_NORMAL; + sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING; + sd.digestIsAppended = CPA_FALSE; + sd.verifyDigest = CPA_FALSE; + + if (dir == QAT_ENCRYPT) { + sd.cipherSetupData.cipherDirection = + CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT; + sd.algChainOrder = + CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER; + } else { + ASSERT3U(dir, ==, QAT_DECRYPT); + sd.cipherSetupData.cipherDirection = + CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT; + sd.algChainOrder = + CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH; + } + + status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size); + if (status != CPA_STATUS_SUCCESS) + return (status); + + status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size); + if (status != CPA_STATUS_SUCCESS) + return (status); + + status = cpaCySymInitSession(inst_handle, symcallback, &sd, + *cy_session_ctx); + if (status != CPA_STATUS_SUCCESS) { + QAT_PHYS_CONTIG_FREE(*cy_session_ctx); + return (status); + } + + return (CPA_STATUS_SUCCESS); +} + +static CpaStatus +init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs, + CpaBufferList *src, CpaBufferList *dst) +{ + CpaStatus status = CPA_STATUS_SUCCESS; + Cpa32U meta_size = 0; + + status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size); + if (status != CPA_STATUS_SUCCESS) + return (status); + + src->numBuffers = nr_bufs; + status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size); + if (status != CPA_STATUS_SUCCESS) + goto error; + + if (src != dst) { + dst->numBuffers = nr_bufs; + status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData, + meta_size); + if (status != CPA_STATUS_SUCCESS) + goto error; + } + + return (CPA_STATUS_SUCCESS); + +error: + QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData); + if (src != dst) + QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData); + + return (status); +} + +int +qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf, + uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf, + crypto_key_t *key, uint64_t crypt, uint32_t enc_len) +{ + CpaStatus status = CPA_STATUS_SUCCESS; + Cpa16U i; + CpaInstanceHandle cy_inst_handle; + Cpa16U nr_bufs; + Cpa32U bytes_left = 0; + Cpa8S *in = NULL; + Cpa8S *out = NULL; + CpaCySymSessionCtx *cy_session_ctx = NULL; + cy_callback_t cb; + CpaCySymOpData op_data = { 0 }; + CpaBufferList src_buffer_list = { 0 }; + CpaBufferList dst_buffer_list = { 0 }; + CpaFlatBuffer *flat_src_buf_array = NULL; + CpaFlatBuffer *flat_src_buf = NULL; + CpaFlatBuffer *flat_dst_buf_array = NULL; + CpaFlatBuffer *flat_dst_buf = NULL; + struct page *in_pages[MAX_PAGE_NUM]; + struct page *out_pages[MAX_PAGE_NUM]; + Cpa32S page_num = 0; + + if (dir == QAT_ENCRYPT) { + QAT_STAT_BUMP(encrypt_requests); + QAT_STAT_INCR(encrypt_total_in_bytes, enc_len); + } else { + QAT_STAT_BUMP(decrypt_requests); + QAT_STAT_INCR(decrypt_total_in_bytes, enc_len); + } + + i = atomic_inc_32_nv(&inst_num) % num_inst; + cy_inst_handle = cy_inst_handles[i]; + + status = init_cy_session_ctx(dir, cy_inst_handle, &cy_session_ctx, key, + crypt, aad_len); + if (status != CPA_STATUS_SUCCESS) + return (status); + + nr_bufs = enc_len / PAGE_CACHE_SIZE + + (enc_len % PAGE_CACHE_SIZE == 0 ? 0 : 1); + status = init_cy_buffer_lists(cy_inst_handle, nr_bufs, &src_buffer_list, + &dst_buffer_list); + if (status != CPA_STATUS_SUCCESS) + goto fail; + + status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array, + nr_bufs * sizeof (CpaFlatBuffer)); + if (status != CPA_STATUS_SUCCESS) + goto fail; + status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array, + nr_bufs * sizeof (CpaFlatBuffer)); + if (status != CPA_STATUS_SUCCESS) + goto fail; + + bytes_left = enc_len; + in = src_buf; + out = dst_buf; + flat_src_buf = flat_src_buf_array; + flat_dst_buf = flat_dst_buf_array; + while (bytes_left > 0) { + in_pages[page_num] = qat_mem_to_page(in); + out_pages[page_num] = qat_mem_to_page(out); + flat_src_buf->pData = kmap(in_pages[page_num]); + flat_dst_buf->pData = kmap(out_pages[page_num]); + flat_src_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE, + (long)bytes_left); + flat_dst_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE, + (long)bytes_left); + in += flat_src_buf->dataLenInBytes; + out += flat_dst_buf->dataLenInBytes; + bytes_left -= flat_src_buf->dataLenInBytes; + flat_src_buf++; + flat_dst_buf++; + page_num++; + } + src_buffer_list.pBuffers = flat_src_buf_array; + dst_buffer_list.pBuffers = flat_dst_buf_array; + + op_data.sessionCtx = cy_session_ctx; + op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL; + op_data.pIv = NULL; /* set this later as the J0 block */ + op_data.ivLenInBytes = 0; + op_data.cryptoStartSrcOffsetInBytes = 0; + op_data.messageLenToCipherInBytes = 0; + op_data.hashStartSrcOffsetInBytes = 0; + op_data.messageLenToHashInBytes = 0; + op_data.pDigestResult = 0; + op_data.messageLenToCipherInBytes = enc_len; + op_data.ivLenInBytes = ZIO_DATA_IV_LEN; + op_data.pDigestResult = digest_buf; + op_data.pAdditionalAuthData = aad_buf; + op_data.pIv = iv_buf; + + cb.verify_result = CPA_FALSE; + init_completion(&cb.complete); + status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data, + &src_buffer_list, &dst_buffer_list, NULL); + if (status != CPA_STATUS_SUCCESS) + goto fail; + + if (!wait_for_completion_interruptible_timeout(&cb.complete, + QAT_TIMEOUT_MS)) { + status = CPA_STATUS_FAIL; + goto fail; + } + + if (cb.verify_result == CPA_FALSE) { + status = CPA_STATUS_FAIL; + goto fail; + } + + if (dir == QAT_ENCRYPT) + QAT_STAT_INCR(encrypt_total_out_bytes, enc_len); + else + QAT_STAT_INCR(decrypt_total_out_bytes, enc_len); + +fail: + /* don't count CCM as a failure since it's not supported */ + if (status != CPA_STATUS_SUCCESS && + zio_crypt_table[crypt].ci_crypt_type != ZC_TYPE_CCM) + QAT_STAT_BUMP(crypt_fails); + + for (i = 0; i < page_num; i ++) { + kunmap(in_pages[i]); + kunmap(out_pages[i]); + } + + cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx); + QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData); + QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData); + QAT_PHYS_CONTIG_FREE(cy_session_ctx); + QAT_PHYS_CONTIG_FREE(flat_src_buf_array); + QAT_PHYS_CONTIG_FREE(flat_dst_buf_array); + + return (status); +} + +module_param(zfs_qat_disable, int, 0644); +MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT acceleration"); + +#endif diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index e742af2550..c67bacbbb0 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -56,7 +56,7 @@ #include #include "zfs_prop.h" #include -#include "qat_compress.h" +#include "qat.h" /* * SPA locking diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c index d0b39a3f20..741d64ad5f 100644 --- a/module/zfs/zio_crypt.c +++ b/module/zfs/zio_crypt.c @@ -26,6 +26,7 @@ #include #include #include +#include "qat.h" /* * This file is responsible for handling all of the details of generating @@ -1875,16 +1876,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, crypto_ctx_template_t tmpl; uint8_t *authbuf = NULL; - bzero(&puio, sizeof (uio_t)); - bzero(&cuio, sizeof (uio_t)); - - /* create uios for encryption */ - ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, - cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, - &authbuf, &auth_len, no_crypt); - if (ret != 0) - return (ret); - /* * If the needed key is the current one, just use it. Otherwise we * need to generate a temporary one from the given salt + master key. @@ -1914,7 +1905,48 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt, tmpl = NULL; } - /* perform the encryption / decryption */ + /* + * Attempt to use QAT acceleration if we can. We currently don't + * do this for metadnode and ZIL blocks, since they have a much + * more involved buffer layout and the qat_crypt() function only + * works in-place. + */ + if (qat_crypt_use_accel(datalen) && + ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) { + uint8_t *srcbuf, *dstbuf; + + if (encrypt) { + srcbuf = plainbuf; + dstbuf = cipherbuf; + } else { + srcbuf = cipherbuf; + dstbuf = plainbuf; + } + + ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf, + dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen); + if (ret == CPA_STATUS_SUCCESS) { + if (locked) { + rw_exit(&key->zk_salt_lock); + locked = B_FALSE; + } + + return (0); + } + /* If the hardware implementation fails fall back to software */ + } + + bzero(&puio, sizeof (uio_t)); + bzero(&cuio, sizeof (uio_t)); + + /* create uios for encryption */ + ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf, + cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len, + &authbuf, &auth_len, no_crypt); + if (ret != 0) + goto error; + + /* perform the encryption / decryption in software */ ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len, &puio, &cuio, authbuf, auth_len); if (ret != 0)