Revert "Tune zio buffer caches and their alignments"

This reverts commit bd7a02c251 which
can trigger an unlikely existing bio alignment issue on Linux.
This change is good, but the underlying issue it exposes needs to
be resolved before this can be re-applied.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #15533
This commit is contained in:
Brian Behlendorf 2023-11-27 13:49:20 -08:00
parent 55dd24c4cc
commit 89fcb8c6f9
1 changed files with 49 additions and 38 deletions

View File

@ -158,22 +158,23 @@ zio_init(void)
zio_link_cache = kmem_cache_create("zio_link_cache", zio_link_cache = kmem_cache_create("zio_link_cache",
sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
/*
* For small buffers, we want a cache for each multiple of
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
* for each quarter-power of 2.
*/
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) { for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
size_t size = (c + 1) << SPA_MINBLOCKSHIFT; size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
size_t align, cflags, data_cflags;
char name[32];
/*
* Create cache for each half-power of 2 size, starting from
* SPA_MINBLOCKSIZE. It should give us memory space efficiency
* of ~7/8, sufficient for transient allocations mostly using
* these caches.
*/
size_t p2 = size; size_t p2 = size;
size_t align = 0;
size_t data_cflags, cflags;
data_cflags = KMC_NODEBUG;
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
while (!ISP2(p2)) while (!ISP2(p2))
p2 &= p2 - 1; p2 &= p2 - 1;
if (!IS_P2ALIGNED(size, p2 / 2))
continue;
#ifndef _KERNEL #ifndef _KERNEL
/* /*
@ -184,16 +185,24 @@ zio_init(void)
*/ */
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE)) if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue; continue;
/*
* Here's the problem - on 4K native devices in userland on
* Linux using O_DIRECT, buffers must be 4K aligned or I/O
* will fail with EINVAL, causing zdb (and others) to coredump.
* Since userland probably doesn't need optimized buffer caches,
* we just force 4K alignment on everything.
*/
align = 8 * SPA_MINBLOCKSIZE;
#else
if (size < PAGESIZE) {
align = SPA_MINBLOCKSIZE;
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = PAGESIZE;
}
#endif #endif
if (IS_P2ALIGNED(size, PAGESIZE)) if (align != 0) {
align = PAGESIZE; char name[36];
else
align = 1 << (highbit64(size ^ (size - 1)) - 1);
cflags = (zio_exclude_metadata || size > zio_buf_debug_limit) ?
KMC_NODEBUG : 0;
data_cflags = KMC_NODEBUG;
if (cflags == data_cflags) { if (cflags == data_cflags) {
/* /*
* Resulting kmem caches would be identical. * Resulting kmem caches would be identical.
@ -201,20 +210,22 @@ zio_init(void)
*/ */
(void) snprintf(name, sizeof (name), (void) snprintf(name, sizeof (name),
"zio_buf_comb_%lu", (ulong_t)size); "zio_buf_comb_%lu", (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align, zio_buf_cache[c] = kmem_cache_create(name,
NULL, NULL, NULL, NULL, NULL, cflags); size, align, NULL, NULL, NULL, NULL, NULL,
cflags);
zio_data_buf_cache[c] = zio_buf_cache[c]; zio_data_buf_cache[c] = zio_buf_cache[c];
continue; continue;
} }
(void) snprintf(name, sizeof (name), "zio_buf_%lu", (void) snprintf(name, sizeof (name), "zio_buf_%lu",
(ulong_t)size); (ulong_t)size);
zio_buf_cache[c] = kmem_cache_create(name, size, align, zio_buf_cache[c] = kmem_cache_create(name, size,
NULL, NULL, NULL, NULL, NULL, cflags); align, NULL, NULL, NULL, NULL, NULL, cflags);
(void) snprintf(name, sizeof (name), "zio_data_buf_%lu", (void) snprintf(name, sizeof (name), "zio_data_buf_%lu",
(ulong_t)size); (ulong_t)size);
zio_data_buf_cache[c] = kmem_cache_create(name, size, align, zio_data_buf_cache[c] = kmem_cache_create(name, size,
NULL, NULL, NULL, NULL, NULL, data_cflags); align, NULL, NULL, NULL, NULL, NULL, data_cflags);
}
} }
while (--c != 0) { while (--c != 0) {