diff --git a/cmd/zstream/zstream_redup.c b/cmd/zstream/zstream_redup.c
index dccd325d4c..51266b0b66 100644
--- a/cmd/zstream/zstream_redup.c
+++ b/cmd/zstream/zstream_redup.c
@@ -132,7 +132,7 @@ static void
rdt_insert(redup_table_t *rdt,
uint64_t guid, uint64_t object, uint64_t offset, uint64_t stream_offset)
{
- uint64_t ch = cityhash4(guid, object, offset, 0);
+ uint64_t ch = cityhash3(guid, object, offset);
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
redup_entry_t **rdepp;
@@ -152,7 +152,7 @@ rdt_lookup(redup_table_t *rdt,
uint64_t guid, uint64_t object, uint64_t offset,
uint64_t *stream_offsetp)
{
- uint64_t ch = cityhash4(guid, object, offset, 0);
+ uint64_t ch = cityhash3(guid, object, offset);
uint64_t hashcode = BF64_GET(ch, 0, rdt->numhashbits);
for (redup_entry_t *rde = rdt->redup_hash_array[hashcode];
diff --git a/include/cityhash.h b/include/cityhash.h
index 3b2d1e84b5..346fb673a0 100644
--- a/include/cityhash.h
+++ b/include/cityhash.h
@@ -32,6 +32,13 @@
extern "C" {
#endif
+/*
+ * Define 1/2/3-argument specialized versions of cityhash4, which can reduce
+ * instruction count (especially multiplication) on some 32-bit arches.
+ */
+_SYS_CITYHASH_H uint64_t cityhash1(uint64_t);
+_SYS_CITYHASH_H uint64_t cityhash2(uint64_t, uint64_t);
+_SYS_CITYHASH_H uint64_t cityhash3(uint64_t, uint64_t, uint64_t);
_SYS_CITYHASH_H uint64_t cityhash4(uint64_t, uint64_t, uint64_t, uint64_t);
#ifdef __cplusplus
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index 51b29643ee..fab97ada68 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -153,6 +153,9 @@
+
+
+
@@ -9179,6 +9182,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index d1e3061b50..b9885d4ddf 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -566,8 +566,8 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
rq->q->queue_hw_ctx[rq->q->mq_map[rq->cpu]]->queue_num;
#endif
#endif
- taskq_hash = cityhash4((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
- blk_mq_hw_queue, 0);
+ taskq_hash = cityhash3((uintptr_t)zv, offset >> ZVOL_TASKQ_OFFSET_SHIFT,
+ blk_mq_hw_queue);
tq_idx = taskq_hash % ztqs->tqs_cnt;
if (rw == WRITE) {
diff --git a/module/zcommon/cityhash.c b/module/zcommon/cityhash.c
index 413a96df2c..c758ec47d1 100644
--- a/module/zcommon/cityhash.c
+++ b/module/zcommon/cityhash.c
@@ -49,8 +49,8 @@ cityhash_helper(uint64_t u, uint64_t v, uint64_t mul)
return (b);
}
-uint64_t
-cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
+static inline uint64_t
+cityhash_impl(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
{
uint64_t mul = HASH_K2 + 64;
uint64_t a = w1 * HASH_K1;
@@ -59,9 +59,38 @@ cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
uint64_t d = w3 * HASH_K2;
return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d,
a + rotate(b + HASH_K2, 18) + c, mul));
+}
+/*
+ * Passing w as the 2nd argument could save one 64-bit multiplication.
+ */
+uint64_t
+cityhash1(uint64_t w)
+{
+ return (cityhash_impl(0, w, 0, 0));
+}
+
+uint64_t
+cityhash2(uint64_t w1, uint64_t w2)
+{
+ return (cityhash_impl(w1, w2, 0, 0));
+}
+
+uint64_t
+cityhash3(uint64_t w1, uint64_t w2, uint64_t w3)
+{
+ return (cityhash_impl(w1, w2, w3, 0));
+}
+
+uint64_t
+cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
+{
+ return (cityhash_impl(w1, w2, w3, w4));
}
#if defined(_KERNEL)
+EXPORT_SYMBOL(cityhash1);
+EXPORT_SYMBOL(cityhash2);
+EXPORT_SYMBOL(cityhash3);
EXPORT_SYMBOL(cityhash4);
#endif
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 8f4fefa4f4..dfe471b644 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -66,6 +66,7 @@
#include "zfs_namecheck.h"
#include
#include
+#include
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -390,27 +391,13 @@ dmu_objset_byteswap(void *buf, size_t size)
}
/*
- * The hash is a CRC-based hash of the objset_t pointer and the object number.
+ * Runs cityhash on the objset_t pointer and the object number.
*/
static uint64_t
dnode_hash(const objset_t *os, uint64_t obj)
{
uintptr_t osv = (uintptr_t)os;
- uint64_t crc = -1ULL;
-
- ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
- /*
- * The lower 11 bits of the pointer don't have much entropy, because
- * the objset_t is more than 1KB long and so likely aligned to 2KB.
- */
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 11)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
- crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
-
- crc ^= (osv>>14) ^ (obj>>24);
-
- return (crc);
+ return (cityhash2((uint64_t)osv, obj));
}
static unsigned int
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index 53992931e0..68570efca9 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -4161,8 +4161,8 @@ zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg, blkptr_t *new_bp,
* some parallelism.
*/
int flags = METASLAB_ZIL;
- int allocator = (uint_t)cityhash4(0, 0, 0,
- os->os_dsl_dataset->ds_object) % spa->spa_alloc_count;
+ int allocator = (uint_t)cityhash1(os->os_dsl_dataset->ds_object)
+ % spa->spa_alloc_count;
error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
txg, NULL, flags, &io_alloc_list, NULL, allocator);
*slog = (error == 0);