diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java new file mode 100644 index 000000000..e86dc3d97 --- /dev/null +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java @@ -0,0 +1,233 @@ +package cn.hutool.core.lang.hash; + +import java.util.Arrays; + +/** + * Apache 发布的MetroHash算法,是一组用于非加密用例的最先进的哈希函数。 + * 除了卓越的性能外,他们还以算法生成而著称。 + * + *

+ * 官方实现:https://github.com/jandrewrogers/MetroHash + * 官方文档:http://www.jandrewrogers.com/2015/05/27/metrohash/ + * Go语言实现:https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/ + * @author li + */ + +public class MetroHash { + + /** + * hash64 种子加盐 + */ + private final static long k0_64 = 0xD6D018F5; + private final static long k1_64 = 0xA2AA033B; + private final static long k2_64 = 0x62992FC1; + private final static long k3_64 = 0x30BC5B29; + + /** + * hash128 种子加盐 + */ + private final static long k0_128 = 0xC83A91E1; + private final static long k1_128 = 0x8648DBDB; + private final static long k2_128 = 0x7BDEC03B; + private final static long k3_128 = 0x2F5870A5; + + public static long hash64(String str) { + return hash64(str, 1337); + } + + public static long hash64(byte[] data) { + return hash64(data, 1337); + } + + public static long hash64(String str, long seed) { + return hash64(str.getBytes(), seed); + } + + public static Number128 hash128(String str) { + return hash128(str, 1337); + } + + public static Number128 hash128(byte[] data) { + return hash128(data, 1337); + } + + public static Number128 hash128(String str, long seed) { + return hash128(str.getBytes(), seed); + } + + public static long hash64(byte[] data, long seed) { + byte[] buffer = data; + long hash = (seed + k2_64) * k0_64; + + long v0, v1, v2, v3; + v0 = hash; + v1 = hash; + v2 = hash; + v3 = hash; + + if (buffer.length >= 32) { + + while (buffer.length >= 32) { + v0 += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k0_64; + v0 = rotateLeft64(v0, -29) + v2; + v1 += littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k1_64; + v1 = rotateLeft64(v1, -29) + v3; + v2 += littleEndian64(Arrays.copyOfRange(buffer, 16, 24)) * k2_64; + v2 = rotateLeft64(v2, -29) + v0; + v3 += littleEndian64(Arrays.copyOfRange(buffer, 24, 32)) * k3_64; + v3 = rotateLeft64(v3, -29) + v1; + buffer = Arrays.copyOfRange(buffer, 32, buffer.length); + } + + v2 ^= rotateLeft64(((v0 + v3) * k0_64) + v1, -37) * k1_64; + v3 ^= rotateLeft64(((v1 + v2) * k1_64) + v0, -37) * k0_64; + v0 ^= rotateLeft64(((v0 + v2) * k0_64) + v3, -37) * k1_64; + v1 ^= rotateLeft64(((v1 + v3) * k1_64) + v2, -37) * k0_64; + hash += v0 ^ v1; + } + + if (buffer.length >= 16) { + v0 = hash + littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k2_64; + v0 = rotateLeft64(v0, -29) * k3_64; + v1 = hash + littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k2_64; + v1 = rotateLeft64(v1, -29) * k3_64; + v0 ^= rotateLeft64(v0 * k0_64, -21) + v1; + v1 ^= rotateLeft64(v1 * k3_64, -21) + v0; + hash += v1; + buffer = Arrays.copyOfRange(buffer, 16, buffer.length); + } + + if (buffer.length >= 8) { + hash += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k3_64; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + hash ^= rotateLeft64(hash, -55) * k1_64; + } + + if (buffer.length >= 4) { + hash += (long) littleEndian32(Arrays.copyOfRange(buffer, 0, 4)) * k3_64; + hash ^= rotateLeft64(hash, -26) * k1_64; + buffer = Arrays.copyOfRange(buffer, 4, buffer.length); + } + + if (buffer.length >= 2) { + hash += (long) littleEndian16(Arrays.copyOfRange(buffer, 0, 2)) * k3_64; + buffer = Arrays.copyOfRange(buffer, 2, buffer.length); + hash ^= rotateLeft64(hash, -48) * k1_64; + } + + if (buffer.length >= 1) { + hash += (long) buffer[0] * k3_64; + hash ^= rotateLeft64(hash, -38) * k1_64; + } + + hash ^= rotateLeft64(hash, -28); + hash *= k0_64; + hash ^= rotateLeft64(hash, -29); + + return hash; + } + + + public static Number128 hash128(byte[] data, long seed) { + byte[] buffer = data; + + long v0, v1, v2, v3; + + v0 = (seed - k0_128) * k3_128; + v1 = (seed + k1_128) * k2_128; + + if (buffer.length >= 32) { + v2 = (seed + k0_128) * k2_128; + v3 = (seed - k1_128) * k3_128; + + while (buffer.length >= 32) { + v0 += littleEndian64(buffer) * k0_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v0 = rotateRight(v0, 29) + v2; + v1 += littleEndian64(buffer) * k1_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v1 = rotateRight(v1, 29) + v3; + v2 += littleEndian64(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v2 = rotateRight(v2, 29) + v0; + v3 = littleEndian64(buffer) * k3_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v3 = rotateRight(v3, 29) + v1; + } + + v2 ^= rotateRight(((v0 + v3) * k0_128) + v1, 21) * k1_128; + v3 ^= rotateRight(((v1 + v2) * k1_128) + v0, 21) * k0_128; + v0 ^= rotateRight(((v0 + v2) * k0_128) + v3, 21) * k1_128; + v1 ^= rotateRight(((v1 + v3) * k1_128) + v2, 21) * k0_128; + } + + if (buffer.length >= 16) { + v0 += littleEndian64(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v0 = rotateRight(v0, 33) * k3_128; + v1 += littleEndian64(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v1 = rotateRight(v1, 33) * k3_128; + v0 ^= rotateRight((v0 * k2_128) + v1, 45) + k1_128; + v1 ^= rotateRight((v1 * k3_128) + v0, 45) + k0_128; + } + + if (buffer.length >= 8) { + v0 += littleEndian64(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 8, buffer.length); + v0 = rotateRight(v0, 33) * k3_128; + v0 ^= rotateRight((v0 * k2_128) + v1, 27) * k1_128; + } + + if (buffer.length >= 4) { + v1 += (long) littleEndian32(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 4, buffer.length); + v1 = rotateRight(v1, 33) * k3_128; + v1 ^= rotateRight((v1 * k3_128) + v0, 46) * k0_128; + } + + if (buffer.length >= 2) { + v0 += (long) littleEndian16(buffer) * k2_128; + buffer = Arrays.copyOfRange(buffer, 2, buffer.length); + v0 = rotateRight(v0, 33) * k3_128; + v0 ^= rotateRight((v0 * k2_128) * v1, 22) * k1_128; + } + + if (buffer.length >= 1) { + v1 += (long) buffer[0] * k2_128; + v1 = rotateRight(v1, 33) * k3_128; + v1 ^= rotateRight((v1 * k3_128) + v0, 58) * k0_128; + } + + v0 += rotateRight((v0 * k0_128) + v1, 13); + v1 += rotateRight((v1 * k1_128) + v0, 37); + v0 += rotateRight((v0 * k2_128) + v1, 13); + v1 += rotateRight((v1 * k3_128) + v0, 37); + + return new Number128(v0, v1); + } + + + private static long littleEndian64(byte[] b) { + return (long) b[0] | (long) (b[1]) << 8 | (long) b[2] << 16 | (long) b[3] << 24 | + (long) b[4] << 32 | (long) b[5] << 40 | (long) b[6] << 48 | (long) b[7] << 56; + } + + private static int littleEndian32(byte[] b) { + return (int) b[0] | (int) b[1] << 8 | (int) b[2] << 16 | (int) b[3] << 24; + } + + private static int littleEndian16(byte[] b) { + return (short) b[0] | (short) b[1] << 8; + } + + private static long rotateLeft64(long x, int k) { + int n = 64; + int s = k & (n - 1); + return x << s | x >> (n - s); + } + + private static long rotateRight(long val, int shift) { + return (val >> shift) | (val << (64 - shift)); + } +} diff --git a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java index 97ee1537b..5bd64d5aa 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java @@ -1,6 +1,7 @@ package cn.hutool.core.util; import cn.hutool.core.lang.hash.CityHash; +import cn.hutool.core.lang.hash.MetroHash; import cn.hutool.core.lang.hash.MurmurHash; import cn.hutool.core.lang.hash.Number128; @@ -545,4 +546,89 @@ public class HashUtil { public static long[] cityHash128(byte[] data, Number128 seed) { return CityHash.hash128(data, seed).getLongArray(); } + + /** + * MetroHash 算法64-bit实现 + * + * @param data 数据 + * @param seed 种子 + * @return + */ + public static long metroHash64(byte[] data, long seed) { + return MetroHash.hash64(data, seed); + } + + /** + * MetroHash 算法64-bit实现 + * + * @param data 数据 + * @return + */ + public static long metroHash64(byte[] data) { + return MetroHash.hash64(data); + } + + /** + * MetroHash 算法64-bit实现 + * + * @param str 数据 + * @param seed 种子 + * @return + */ + public static long metroHash64(String str,long seed) { + return MetroHash.hash64(str,seed); + } + + /** + * MetroHash 算法64-bit实现 + * + * @param str 数据 + * @return + */ + public static long metroHash64(String str) { + return MetroHash.hash64(str); + } + + /** + * MetroHash 算法128-bit实现 + * + * @param data 数据 + * @param seed 种子 + * @return hash值,long[0]:低位,long[1]:高位 + */ + public static long[] metroHash128(byte[] data, long seed) { + return MetroHash.hash128(data,seed).getLongArray(); + } + + /** + * MetroHash 算法128-bit实现 + * + * @param data 数据 + * @return hash值,long[0]:低位,long[1]:高位 + */ + public static long[] metroHash128(byte[] data) { + return MetroHash.hash128(data).getLongArray(); + } + + /** + * MetroHash 算法128-bit实现 + * + * @param str 数据 + * @return hash值,long[0]:低位,long[1]:高位 + */ + public static long[] metroHash128(String str) { + return MetroHash.hash128(str).getLongArray(); + } + + /** + * MetroHash 算法128-bit实现 + * + * @param str 数据 + * @param seed 种子 + * @return hash值,long[0]:低位,long[1]:高位 + */ + public static long[] metroHash128(String str, long seed) { + return MetroHash.hash128(str,seed).getLongArray(); + } + } diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java new file mode 100644 index 000000000..b30b5f9f7 --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java @@ -0,0 +1,81 @@ +package cn.hutool.core.lang; + + +import cn.hutool.core.lang.hash.CityHash; +import cn.hutool.core.lang.hash.MetroHash; +import org.junit.Test; + +import java.util.Random; + +public class MetroHashTest { + + + /** + * 数据量越大 MetroHash 优势越明显, + */ + @Test + public void bulkHashing64Test() { + String[] strArray = getRandomStringArray(10000000); + long startCity = System.currentTimeMillis(); + for (String s : strArray) { + CityHash.hash64(s.getBytes()); + } + long endCity = System.currentTimeMillis(); + + long startMetro = System.currentTimeMillis(); + for (String s : strArray) { + MetroHash.hash64(s); + } + long endMetro = System.currentTimeMillis(); + + System.out.println("metroHash =============" + (endMetro - startMetro)); + System.out.println("cityHash =============" + (endCity - startCity)); + } + + + /** + * 数据量越大 MetroHash 优势越明显, + */ + @Test + public void bulkHashing128Test() { + String[] strArray = getRandomStringArray(10000000); + long startCity = System.currentTimeMillis(); + for (String s : strArray) { + CityHash.hash128(s.getBytes()); + } + long endCity = System.currentTimeMillis(); + + long startMetro = System.currentTimeMillis(); + for (String s : strArray) { + MetroHash.hash128(s); + } + long endMetro = System.currentTimeMillis(); + + System.out.println("metroHash =============" + (endMetro - startMetro)); + System.out.println("cityHash =============" + (endCity - startCity)); + } + + + private static String[] getRandomStringArray(int length) { + String[] result = new String[length]; + Random random = new Random(); + int index = 0; + while (index < length) { + result[index++] = getRandomString(random.nextInt(64)); + } + return result; + } + + private static String getRandomString(int length) { + String str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + Random random = new Random(); + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < length; i++) { + int number = random.nextInt(62); + sb.append(str.charAt(number)); + } + return sb.toString(); + } + + +} diff --git a/hutool-cuckooFilter/pom.xml b/hutool-cuckooFilter/pom.xml new file mode 100644 index 000000000..793315b04 --- /dev/null +++ b/hutool-cuckooFilter/pom.xml @@ -0,0 +1,26 @@ + + + + hutool-parent + cn.hutool + 5.7.19 + + 4.0.0 + jar + + hutool-cuckooFilter + ${project.artifactId} + Hutool 布谷鸟过滤器 + + + + + cn.hutool + hutool-core + ${project.parent.version} + + + + diff --git a/pom.xml b/pom.xml index 673cdadb7..6d2609f28 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,8 @@ hutool-captcha hutool-socket hutool-jwt - + hutool-cuckooFilter + utf-8