diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java new file mode 100644 index 000000000..e86dc3d97 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java @@ -0,0 +1,233 @@ +package cn.hutool.core.lang.hash; + +import java.util.Arrays; + +/** + * Apache 发布的MetroHash算法,是一组用于非加密用例的最先进的哈希函数。 + * 除了卓越的性能外,他们还以算法生成而著称。 + * + *
+ * 官方实现:https://github.com/jandrewrogers/MetroHash
+ * 官方文档:http://www.jandrewrogers.com/2015/05/27/metrohash/
+ * Go语言实现:https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/
+ * @author li
+ */
+
+public class MetroHash {
+
+ /**
+ * hash64 种子加盐
+ */
+ private final static long k0_64 = 0xD6D018F5;
+ private final static long k1_64 = 0xA2AA033B;
+ private final static long k2_64 = 0x62992FC1;
+ private final static long k3_64 = 0x30BC5B29;
+
+ /**
+ * hash128 种子加盐
+ */
+ private final static long k0_128 = 0xC83A91E1;
+ private final static long k1_128 = 0x8648DBDB;
+ private final static long k2_128 = 0x7BDEC03B;
+ private final static long k3_128 = 0x2F5870A5;
+
+ public static long hash64(String str) {
+ return hash64(str, 1337);
+ }
+
+ public static long hash64(byte[] data) {
+ return hash64(data, 1337);
+ }
+
+ public static long hash64(String str, long seed) {
+ return hash64(str.getBytes(), seed);
+ }
+
+ public static Number128 hash128(String str) {
+ return hash128(str, 1337);
+ }
+
+ public static Number128 hash128(byte[] data) {
+ return hash128(data, 1337);
+ }
+
+ public static Number128 hash128(String str, long seed) {
+ return hash128(str.getBytes(), seed);
+ }
+
+ public static long hash64(byte[] data, long seed) {
+ byte[] buffer = data;
+ long hash = (seed + k2_64) * k0_64;
+
+ long v0, v1, v2, v3;
+ v0 = hash;
+ v1 = hash;
+ v2 = hash;
+ v3 = hash;
+
+ if (buffer.length >= 32) {
+
+ while (buffer.length >= 32) {
+ v0 += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k0_64;
+ v0 = rotateLeft64(v0, -29) + v2;
+ v1 += littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k1_64;
+ v1 = rotateLeft64(v1, -29) + v3;
+ v2 += littleEndian64(Arrays.copyOfRange(buffer, 16, 24)) * k2_64;
+ v2 = rotateLeft64(v2, -29) + v0;
+ v3 += littleEndian64(Arrays.copyOfRange(buffer, 24, 32)) * k3_64;
+ v3 = rotateLeft64(v3, -29) + v1;
+ buffer = Arrays.copyOfRange(buffer, 32, buffer.length);
+ }
+
+ v2 ^= rotateLeft64(((v0 + v3) * k0_64) + v1, -37) * k1_64;
+ v3 ^= rotateLeft64(((v1 + v2) * k1_64) + v0, -37) * k0_64;
+ v0 ^= rotateLeft64(((v0 + v2) * k0_64) + v3, -37) * k1_64;
+ v1 ^= rotateLeft64(((v1 + v3) * k1_64) + v2, -37) * k0_64;
+ hash += v0 ^ v1;
+ }
+
+ if (buffer.length >= 16) {
+ v0 = hash + littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k2_64;
+ v0 = rotateLeft64(v0, -29) * k3_64;
+ v1 = hash + littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k2_64;
+ v1 = rotateLeft64(v1, -29) * k3_64;
+ v0 ^= rotateLeft64(v0 * k0_64, -21) + v1;
+ v1 ^= rotateLeft64(v1 * k3_64, -21) + v0;
+ hash += v1;
+ buffer = Arrays.copyOfRange(buffer, 16, buffer.length);
+ }
+
+ if (buffer.length >= 8) {
+ hash += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k3_64;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ hash ^= rotateLeft64(hash, -55) * k1_64;
+ }
+
+ if (buffer.length >= 4) {
+ hash += (long) littleEndian32(Arrays.copyOfRange(buffer, 0, 4)) * k3_64;
+ hash ^= rotateLeft64(hash, -26) * k1_64;
+ buffer = Arrays.copyOfRange(buffer, 4, buffer.length);
+ }
+
+ if (buffer.length >= 2) {
+ hash += (long) littleEndian16(Arrays.copyOfRange(buffer, 0, 2)) * k3_64;
+ buffer = Arrays.copyOfRange(buffer, 2, buffer.length);
+ hash ^= rotateLeft64(hash, -48) * k1_64;
+ }
+
+ if (buffer.length >= 1) {
+ hash += (long) buffer[0] * k3_64;
+ hash ^= rotateLeft64(hash, -38) * k1_64;
+ }
+
+ hash ^= rotateLeft64(hash, -28);
+ hash *= k0_64;
+ hash ^= rotateLeft64(hash, -29);
+
+ return hash;
+ }
+
+
+ public static Number128 hash128(byte[] data, long seed) {
+ byte[] buffer = data;
+
+ long v0, v1, v2, v3;
+
+ v0 = (seed - k0_128) * k3_128;
+ v1 = (seed + k1_128) * k2_128;
+
+ if (buffer.length >= 32) {
+ v2 = (seed + k0_128) * k2_128;
+ v3 = (seed - k1_128) * k3_128;
+
+ while (buffer.length >= 32) {
+ v0 += littleEndian64(buffer) * k0_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v0 = rotateRight(v0, 29) + v2;
+ v1 += littleEndian64(buffer) * k1_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v1 = rotateRight(v1, 29) + v3;
+ v2 += littleEndian64(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v2 = rotateRight(v2, 29) + v0;
+ v3 = littleEndian64(buffer) * k3_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v3 = rotateRight(v3, 29) + v1;
+ }
+
+ v2 ^= rotateRight(((v0 + v3) * k0_128) + v1, 21) * k1_128;
+ v3 ^= rotateRight(((v1 + v2) * k1_128) + v0, 21) * k0_128;
+ v0 ^= rotateRight(((v0 + v2) * k0_128) + v3, 21) * k1_128;
+ v1 ^= rotateRight(((v1 + v3) * k1_128) + v2, 21) * k0_128;
+ }
+
+ if (buffer.length >= 16) {
+ v0 += littleEndian64(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v0 = rotateRight(v0, 33) * k3_128;
+ v1 += littleEndian64(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v1 = rotateRight(v1, 33) * k3_128;
+ v0 ^= rotateRight((v0 * k2_128) + v1, 45) + k1_128;
+ v1 ^= rotateRight((v1 * k3_128) + v0, 45) + k0_128;
+ }
+
+ if (buffer.length >= 8) {
+ v0 += littleEndian64(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
+ v0 = rotateRight(v0, 33) * k3_128;
+ v0 ^= rotateRight((v0 * k2_128) + v1, 27) * k1_128;
+ }
+
+ if (buffer.length >= 4) {
+ v1 += (long) littleEndian32(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 4, buffer.length);
+ v1 = rotateRight(v1, 33) * k3_128;
+ v1 ^= rotateRight((v1 * k3_128) + v0, 46) * k0_128;
+ }
+
+ if (buffer.length >= 2) {
+ v0 += (long) littleEndian16(buffer) * k2_128;
+ buffer = Arrays.copyOfRange(buffer, 2, buffer.length);
+ v0 = rotateRight(v0, 33) * k3_128;
+ v0 ^= rotateRight((v0 * k2_128) * v1, 22) * k1_128;
+ }
+
+ if (buffer.length >= 1) {
+ v1 += (long) buffer[0] * k2_128;
+ v1 = rotateRight(v1, 33) * k3_128;
+ v1 ^= rotateRight((v1 * k3_128) + v0, 58) * k0_128;
+ }
+
+ v0 += rotateRight((v0 * k0_128) + v1, 13);
+ v1 += rotateRight((v1 * k1_128) + v0, 37);
+ v0 += rotateRight((v0 * k2_128) + v1, 13);
+ v1 += rotateRight((v1 * k3_128) + v0, 37);
+
+ return new Number128(v0, v1);
+ }
+
+
+ private static long littleEndian64(byte[] b) {
+ return (long) b[0] | (long) (b[1]) << 8 | (long) b[2] << 16 | (long) b[3] << 24 |
+ (long) b[4] << 32 | (long) b[5] << 40 | (long) b[6] << 48 | (long) b[7] << 56;
+ }
+
+ private static int littleEndian32(byte[] b) {
+ return (int) b[0] | (int) b[1] << 8 | (int) b[2] << 16 | (int) b[3] << 24;
+ }
+
+ private static int littleEndian16(byte[] b) {
+ return (short) b[0] | (short) b[1] << 8;
+ }
+
+ private static long rotateLeft64(long x, int k) {
+ int n = 64;
+ int s = k & (n - 1);
+ return x << s | x >> (n - s);
+ }
+
+ private static long rotateRight(long val, int shift) {
+ return (val >> shift) | (val << (64 - shift));
+ }
+}
diff --git a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java
index 97ee1537b..5bd64d5aa 100644
--- a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java
+++ b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java
@@ -1,6 +1,7 @@
package cn.hutool.core.util;
import cn.hutool.core.lang.hash.CityHash;
+import cn.hutool.core.lang.hash.MetroHash;
import cn.hutool.core.lang.hash.MurmurHash;
import cn.hutool.core.lang.hash.Number128;
@@ -545,4 +546,89 @@ public class HashUtil {
public static long[] cityHash128(byte[] data, Number128 seed) {
return CityHash.hash128(data, seed).getLongArray();
}
+
+ /**
+ * MetroHash 算法64-bit实现
+ *
+ * @param data 数据
+ * @param seed 种子
+ * @return
+ */
+ public static long metroHash64(byte[] data, long seed) {
+ return MetroHash.hash64(data, seed);
+ }
+
+ /**
+ * MetroHash 算法64-bit实现
+ *
+ * @param data 数据
+ * @return
+ */
+ public static long metroHash64(byte[] data) {
+ return MetroHash.hash64(data);
+ }
+
+ /**
+ * MetroHash 算法64-bit实现
+ *
+ * @param str 数据
+ * @param seed 种子
+ * @return
+ */
+ public static long metroHash64(String str,long seed) {
+ return MetroHash.hash64(str,seed);
+ }
+
+ /**
+ * MetroHash 算法64-bit实现
+ *
+ * @param str 数据
+ * @return
+ */
+ public static long metroHash64(String str) {
+ return MetroHash.hash64(str);
+ }
+
+ /**
+ * MetroHash 算法128-bit实现
+ *
+ * @param data 数据
+ * @param seed 种子
+ * @return hash值,long[0]:低位,long[1]:高位
+ */
+ public static long[] metroHash128(byte[] data, long seed) {
+ return MetroHash.hash128(data,seed).getLongArray();
+ }
+
+ /**
+ * MetroHash 算法128-bit实现
+ *
+ * @param data 数据
+ * @return hash值,long[0]:低位,long[1]:高位
+ */
+ public static long[] metroHash128(byte[] data) {
+ return MetroHash.hash128(data).getLongArray();
+ }
+
+ /**
+ * MetroHash 算法128-bit实现
+ *
+ * @param str 数据
+ * @return hash值,long[0]:低位,long[1]:高位
+ */
+ public static long[] metroHash128(String str) {
+ return MetroHash.hash128(str).getLongArray();
+ }
+
+ /**
+ * MetroHash 算法128-bit实现
+ *
+ * @param str 数据
+ * @param seed 种子
+ * @return hash值,long[0]:低位,long[1]:高位
+ */
+ public static long[] metroHash128(String str, long seed) {
+ return MetroHash.hash128(str,seed).getLongArray();
+ }
+
}
diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java
new file mode 100644
index 000000000..b30b5f9f7
--- /dev/null
+++ b/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java
@@ -0,0 +1,81 @@
+package cn.hutool.core.lang;
+
+
+import cn.hutool.core.lang.hash.CityHash;
+import cn.hutool.core.lang.hash.MetroHash;
+import org.junit.Test;
+
+import java.util.Random;
+
+public class MetroHashTest {
+
+
+ /**
+ * 数据量越大 MetroHash 优势越明显,
+ */
+ @Test
+ public void bulkHashing64Test() {
+ String[] strArray = getRandomStringArray(10000000);
+ long startCity = System.currentTimeMillis();
+ for (String s : strArray) {
+ CityHash.hash64(s.getBytes());
+ }
+ long endCity = System.currentTimeMillis();
+
+ long startMetro = System.currentTimeMillis();
+ for (String s : strArray) {
+ MetroHash.hash64(s);
+ }
+ long endMetro = System.currentTimeMillis();
+
+ System.out.println("metroHash =============" + (endMetro - startMetro));
+ System.out.println("cityHash =============" + (endCity - startCity));
+ }
+
+
+ /**
+ * 数据量越大 MetroHash 优势越明显,
+ */
+ @Test
+ public void bulkHashing128Test() {
+ String[] strArray = getRandomStringArray(10000000);
+ long startCity = System.currentTimeMillis();
+ for (String s : strArray) {
+ CityHash.hash128(s.getBytes());
+ }
+ long endCity = System.currentTimeMillis();
+
+ long startMetro = System.currentTimeMillis();
+ for (String s : strArray) {
+ MetroHash.hash128(s);
+ }
+ long endMetro = System.currentTimeMillis();
+
+ System.out.println("metroHash =============" + (endMetro - startMetro));
+ System.out.println("cityHash =============" + (endCity - startCity));
+ }
+
+
+ private static String[] getRandomStringArray(int length) {
+ String[] result = new String[length];
+ Random random = new Random();
+ int index = 0;
+ while (index < length) {
+ result[index++] = getRandomString(random.nextInt(64));
+ }
+ return result;
+ }
+
+ private static String getRandomString(int length) {
+ String str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+ Random random = new Random();
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < length; i++) {
+ int number = random.nextInt(62);
+ sb.append(str.charAt(number));
+ }
+ return sb.toString();
+ }
+
+
+}
diff --git a/hutool-cuckooFilter/pom.xml b/hutool-cuckooFilter/pom.xml
new file mode 100644
index 000000000..793315b04
--- /dev/null
+++ b/hutool-cuckooFilter/pom.xml
@@ -0,0 +1,26 @@
+
+