diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c index dccd325d4c..51266b0b66 100644 --- a/cmd/zstream/zstream_redup.c +++ b/cmd/zstream/zstream_redup.c @@ -132,7 +132,7 @@ static void rdt_insert(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset) { - uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t ch = cityhash3(guid, object, offset); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); redup_entry_t **rdepp; @@ -152,7 +152,7 @@ rdt_lookup(redup_table_t *rdt, uint64_t guid, uint64_t object, uint64_t offset, uint64_t *stream_offsetp) { - uint64_t ch = cityhash4(guid, object, offset, 0); + uint64_t ch = cityhash3(guid, object, offset); uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits); for (redup_entry_t *rde = rdt->redup_hash_array[hashcode]; diff --git a/include/cityhash.h b/include/cityhash.h index 3b2d1e84b5..346fb673a0 100644 --- a/include/cityhash.h +++ b/include/cityhash.h @@ -32,6 +32,13 @@ extern "C" { #endif +/* + * Define 1/2/3-argument specialized versions of cityhash4, which can reduce + * instruction count (especially multiplication) on some 32-bit arches. + */ +_SYS_CITYHASH_H uint64_t cityhash1(uint64_t); +_SYS_CITYHASH_H uint64_t cityhash2(uint64_t, uint64_t); +_SYS_CITYHASH_H uint64_t cityhash3(uint64_t, uint64_t, uint64_t); _SYS_CITYHASH_H uint64_t cityhash4(uint64_t, uint64_t, uint64_t, uint64_t); #ifdef __cplusplus diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 51b29643ee..fab97ada68 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -153,6 +153,9 @@ + + + @@ -9179,6 +9182,21 @@ + + + + + + + + + + + + + + + diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index d1e3061b50..b9885d4ddf 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -566,8 +566,8 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num; #endif #endif - taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, - blk_mq_hw_queue, 0); + taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT, + blk_mq_hw_queue); tq_idx = taskq_hash % ztqs->tqs_cnt; if (rw == WRITE) { diff --git a/module/zcommon/cityhash.c b/module/zcommon/cityhash.c index 413a96df2c..c758ec47d1 100644 --- a/module/zcommon/cityhash.c +++ b/module/zcommon/cityhash.c @@ -49,8 +49,8 @@ cityhash_helper(uint64_t u, uint64_t v, uint64_t mul) return (b); } -uint64_t -cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +static inline uint64_t +cityhash_impl(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) { uint64_t mul = HASH_K2 + 64; uint64_t a = w1 * HASH_K1; @@ -59,9 +59,38 @@ cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) uint64_t d = w3 * HASH_K2; return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d, a + rotate(b + HASH_K2, 18) + c, mul)); +} +/* + * Passing w as the 2nd argument could save one 64-bit multiplication. + */ +uint64_t +cityhash1(uint64_t w) +{ + return (cityhash_impl(0, w, 0, 0)); +} + +uint64_t +cityhash2(uint64_t w1, uint64_t w2) +{ + return (cityhash_impl(w1, w2, 0, 0)); +} + +uint64_t +cityhash3(uint64_t w1, uint64_t w2, uint64_t w3) +{ + return (cityhash_impl(w1, w2, w3, 0)); +} + +uint64_t +cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +{ + return (cityhash_impl(w1, w2, w3, w4)); } #if defined(_KERNEL) +EXPORT_SYMBOL(cityhash1); +EXPORT_SYMBOL(cityhash2); +EXPORT_SYMBOL(cityhash3); EXPORT_SYMBOL(cityhash4); #endif diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 8f4fefa4f4..dfe471b644 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -66,6 +66,7 @@ #include "zfs_namecheck.h" #include #include +#include /* * Needed to close a window in dnode_move() that allows the objset to be freed @@ -390,27 +391,13 @@ dmu_objset_byteswap(void *buf, size_t size) } /* - * The hash is a CRC-based hash of the objset_t pointer and the object number. + * Runs cityhash on the objset_t pointer and the object number. */ static uint64_t dnode_hash(const objset_t *os, uint64_t obj) { uintptr_t osv = (uintptr_t)os; - uint64_t crc = -1ULL; - - ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); - /* - * The lower 11 bits of the pointer don't have much entropy, because - * the objset_t is more than 1KB long and so likely aligned to 2KB. - */ - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 11)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; - crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF]; - - crc ^= (osv>>14) ^ (obj>>24); - - return (crc); + return (cityhash2((uint64_t)osv, obj)); } static unsigned int diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 53992931e0..68570efca9 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4161,8 +4161,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp, * some parallelism. */ int flags = METASLAB_ZIL; - int allocator = (uint_t)cityhash4(0, 0, 0, - os->os_dsl_dataset->ds_object) % spa->spa_alloc_count; + int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object) + % spa->spa_alloc_count; error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1, txg, NULL, flags, &io_alloc_list, NULL, allocator); *slog = (error == 0);