mirror of
https://gitee.com/dromara/hutool.git
synced 2025-04-05 17:20:07 +08:00
增加MetroHash算法
This commit is contained in:
parent
b6c73dd010
commit
74c1aa12a3
@ -0,0 +1,233 @@
|
||||
package cn.hutool.core.lang.hash;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Apache 发布的MetroHash算法,是一组用于非加密用例的最先进的哈希函数。
|
||||
* 除了卓越的性能外,他们还以算法生成而著称。
|
||||
*
|
||||
* <p>
|
||||
* 官方实现:https://github.com/jandrewrogers/MetroHash
|
||||
* 官方文档:http://www.jandrewrogers.com/2015/05/27/metrohash/
|
||||
* Go语言实现:https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/
|
||||
* @author li
|
||||
*/
|
||||
|
||||
public class MetroHash {
|
||||
|
||||
/**
|
||||
* hash64 种子加盐
|
||||
*/
|
||||
private final static long k0_64 = 0xD6D018F5;
|
||||
private final static long k1_64 = 0xA2AA033B;
|
||||
private final static long k2_64 = 0x62992FC1;
|
||||
private final static long k3_64 = 0x30BC5B29;
|
||||
|
||||
/**
|
||||
* hash128 种子加盐
|
||||
*/
|
||||
private final static long k0_128 = 0xC83A91E1;
|
||||
private final static long k1_128 = 0x8648DBDB;
|
||||
private final static long k2_128 = 0x7BDEC03B;
|
||||
private final static long k3_128 = 0x2F5870A5;
|
||||
|
||||
public static long hash64(String str) {
|
||||
return hash64(str, 1337);
|
||||
}
|
||||
|
||||
public static long hash64(byte[] data) {
|
||||
return hash64(data, 1337);
|
||||
}
|
||||
|
||||
public static long hash64(String str, long seed) {
|
||||
return hash64(str.getBytes(), seed);
|
||||
}
|
||||
|
||||
public static Number128 hash128(String str) {
|
||||
return hash128(str, 1337);
|
||||
}
|
||||
|
||||
public static Number128 hash128(byte[] data) {
|
||||
return hash128(data, 1337);
|
||||
}
|
||||
|
||||
public static Number128 hash128(String str, long seed) {
|
||||
return hash128(str.getBytes(), seed);
|
||||
}
|
||||
|
||||
public static long hash64(byte[] data, long seed) {
|
||||
byte[] buffer = data;
|
||||
long hash = (seed + k2_64) * k0_64;
|
||||
|
||||
long v0, v1, v2, v3;
|
||||
v0 = hash;
|
||||
v1 = hash;
|
||||
v2 = hash;
|
||||
v3 = hash;
|
||||
|
||||
if (buffer.length >= 32) {
|
||||
|
||||
while (buffer.length >= 32) {
|
||||
v0 += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k0_64;
|
||||
v0 = rotateLeft64(v0, -29) + v2;
|
||||
v1 += littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k1_64;
|
||||
v1 = rotateLeft64(v1, -29) + v3;
|
||||
v2 += littleEndian64(Arrays.copyOfRange(buffer, 16, 24)) * k2_64;
|
||||
v2 = rotateLeft64(v2, -29) + v0;
|
||||
v3 += littleEndian64(Arrays.copyOfRange(buffer, 24, 32)) * k3_64;
|
||||
v3 = rotateLeft64(v3, -29) + v1;
|
||||
buffer = Arrays.copyOfRange(buffer, 32, buffer.length);
|
||||
}
|
||||
|
||||
v2 ^= rotateLeft64(((v0 + v3) * k0_64) + v1, -37) * k1_64;
|
||||
v3 ^= rotateLeft64(((v1 + v2) * k1_64) + v0, -37) * k0_64;
|
||||
v0 ^= rotateLeft64(((v0 + v2) * k0_64) + v3, -37) * k1_64;
|
||||
v1 ^= rotateLeft64(((v1 + v3) * k1_64) + v2, -37) * k0_64;
|
||||
hash += v0 ^ v1;
|
||||
}
|
||||
|
||||
if (buffer.length >= 16) {
|
||||
v0 = hash + littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k2_64;
|
||||
v0 = rotateLeft64(v0, -29) * k3_64;
|
||||
v1 = hash + littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k2_64;
|
||||
v1 = rotateLeft64(v1, -29) * k3_64;
|
||||
v0 ^= rotateLeft64(v0 * k0_64, -21) + v1;
|
||||
v1 ^= rotateLeft64(v1 * k3_64, -21) + v0;
|
||||
hash += v1;
|
||||
buffer = Arrays.copyOfRange(buffer, 16, buffer.length);
|
||||
}
|
||||
|
||||
if (buffer.length >= 8) {
|
||||
hash += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k3_64;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
hash ^= rotateLeft64(hash, -55) * k1_64;
|
||||
}
|
||||
|
||||
if (buffer.length >= 4) {
|
||||
hash += (long) littleEndian32(Arrays.copyOfRange(buffer, 0, 4)) * k3_64;
|
||||
hash ^= rotateLeft64(hash, -26) * k1_64;
|
||||
buffer = Arrays.copyOfRange(buffer, 4, buffer.length);
|
||||
}
|
||||
|
||||
if (buffer.length >= 2) {
|
||||
hash += (long) littleEndian16(Arrays.copyOfRange(buffer, 0, 2)) * k3_64;
|
||||
buffer = Arrays.copyOfRange(buffer, 2, buffer.length);
|
||||
hash ^= rotateLeft64(hash, -48) * k1_64;
|
||||
}
|
||||
|
||||
if (buffer.length >= 1) {
|
||||
hash += (long) buffer[0] * k3_64;
|
||||
hash ^= rotateLeft64(hash, -38) * k1_64;
|
||||
}
|
||||
|
||||
hash ^= rotateLeft64(hash, -28);
|
||||
hash *= k0_64;
|
||||
hash ^= rotateLeft64(hash, -29);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
public static Number128 hash128(byte[] data, long seed) {
|
||||
byte[] buffer = data;
|
||||
|
||||
long v0, v1, v2, v3;
|
||||
|
||||
v0 = (seed - k0_128) * k3_128;
|
||||
v1 = (seed + k1_128) * k2_128;
|
||||
|
||||
if (buffer.length >= 32) {
|
||||
v2 = (seed + k0_128) * k2_128;
|
||||
v3 = (seed - k1_128) * k3_128;
|
||||
|
||||
while (buffer.length >= 32) {
|
||||
v0 += littleEndian64(buffer) * k0_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v0 = rotateRight(v0, 29) + v2;
|
||||
v1 += littleEndian64(buffer) * k1_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v1 = rotateRight(v1, 29) + v3;
|
||||
v2 += littleEndian64(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v2 = rotateRight(v2, 29) + v0;
|
||||
v3 = littleEndian64(buffer) * k3_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v3 = rotateRight(v3, 29) + v1;
|
||||
}
|
||||
|
||||
v2 ^= rotateRight(((v0 + v3) * k0_128) + v1, 21) * k1_128;
|
||||
v3 ^= rotateRight(((v1 + v2) * k1_128) + v0, 21) * k0_128;
|
||||
v0 ^= rotateRight(((v0 + v2) * k0_128) + v3, 21) * k1_128;
|
||||
v1 ^= rotateRight(((v1 + v3) * k1_128) + v2, 21) * k0_128;
|
||||
}
|
||||
|
||||
if (buffer.length >= 16) {
|
||||
v0 += littleEndian64(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v0 = rotateRight(v0, 33) * k3_128;
|
||||
v1 += littleEndian64(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v1 = rotateRight(v1, 33) * k3_128;
|
||||
v0 ^= rotateRight((v0 * k2_128) + v1, 45) + k1_128;
|
||||
v1 ^= rotateRight((v1 * k3_128) + v0, 45) + k0_128;
|
||||
}
|
||||
|
||||
if (buffer.length >= 8) {
|
||||
v0 += littleEndian64(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
|
||||
v0 = rotateRight(v0, 33) * k3_128;
|
||||
v0 ^= rotateRight((v0 * k2_128) + v1, 27) * k1_128;
|
||||
}
|
||||
|
||||
if (buffer.length >= 4) {
|
||||
v1 += (long) littleEndian32(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 4, buffer.length);
|
||||
v1 = rotateRight(v1, 33) * k3_128;
|
||||
v1 ^= rotateRight((v1 * k3_128) + v0, 46) * k0_128;
|
||||
}
|
||||
|
||||
if (buffer.length >= 2) {
|
||||
v0 += (long) littleEndian16(buffer) * k2_128;
|
||||
buffer = Arrays.copyOfRange(buffer, 2, buffer.length);
|
||||
v0 = rotateRight(v0, 33) * k3_128;
|
||||
v0 ^= rotateRight((v0 * k2_128) * v1, 22) * k1_128;
|
||||
}
|
||||
|
||||
if (buffer.length >= 1) {
|
||||
v1 += (long) buffer[0] * k2_128;
|
||||
v1 = rotateRight(v1, 33) * k3_128;
|
||||
v1 ^= rotateRight((v1 * k3_128) + v0, 58) * k0_128;
|
||||
}
|
||||
|
||||
v0 += rotateRight((v0 * k0_128) + v1, 13);
|
||||
v1 += rotateRight((v1 * k1_128) + v0, 37);
|
||||
v0 += rotateRight((v0 * k2_128) + v1, 13);
|
||||
v1 += rotateRight((v1 * k3_128) + v0, 37);
|
||||
|
||||
return new Number128(v0, v1);
|
||||
}
|
||||
|
||||
|
||||
private static long littleEndian64(byte[] b) {
|
||||
return (long) b[0] | (long) (b[1]) << 8 | (long) b[2] << 16 | (long) b[3] << 24 |
|
||||
(long) b[4] << 32 | (long) b[5] << 40 | (long) b[6] << 48 | (long) b[7] << 56;
|
||||
}
|
||||
|
||||
private static int littleEndian32(byte[] b) {
|
||||
return (int) b[0] | (int) b[1] << 8 | (int) b[2] << 16 | (int) b[3] << 24;
|
||||
}
|
||||
|
||||
private static int littleEndian16(byte[] b) {
|
||||
return (short) b[0] | (short) b[1] << 8;
|
||||
}
|
||||
|
||||
private static long rotateLeft64(long x, int k) {
|
||||
int n = 64;
|
||||
int s = k & (n - 1);
|
||||
return x << s | x >> (n - s);
|
||||
}
|
||||
|
||||
private static long rotateRight(long val, int shift) {
|
||||
return (val >> shift) | (val << (64 - shift));
|
||||
}
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
package cn.hutool.core.util;
|
||||
|
||||
import cn.hutool.core.lang.hash.CityHash;
|
||||
import cn.hutool.core.lang.hash.MetroHash;
|
||||
import cn.hutool.core.lang.hash.MurmurHash;
|
||||
import cn.hutool.core.lang.hash.Number128;
|
||||
|
||||
@ -545,4 +546,89 @@ public class HashUtil {
|
||||
public static long[] cityHash128(byte[] data, Number128 seed) {
|
||||
return CityHash.hash128(data, seed).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法64-bit实现
|
||||
*
|
||||
* @param data 数据
|
||||
* @param seed 种子
|
||||
* @return
|
||||
*/
|
||||
public static long metroHash64(byte[] data, long seed) {
|
||||
return MetroHash.hash64(data, seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法64-bit实现
|
||||
*
|
||||
* @param data 数据
|
||||
* @return
|
||||
*/
|
||||
public static long metroHash64(byte[] data) {
|
||||
return MetroHash.hash64(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法64-bit实现
|
||||
*
|
||||
* @param str 数据
|
||||
* @param seed 种子
|
||||
* @return
|
||||
*/
|
||||
public static long metroHash64(String str,long seed) {
|
||||
return MetroHash.hash64(str,seed);
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法64-bit实现
|
||||
*
|
||||
* @param str 数据
|
||||
* @return
|
||||
*/
|
||||
public static long metroHash64(String str) {
|
||||
return MetroHash.hash64(str);
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法128-bit实现
|
||||
*
|
||||
* @param data 数据
|
||||
* @param seed 种子
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(byte[] data, long seed) {
|
||||
return MetroHash.hash128(data,seed).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法128-bit实现
|
||||
*
|
||||
* @param data 数据
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(byte[] data) {
|
||||
return MetroHash.hash128(data).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法128-bit实现
|
||||
*
|
||||
* @param str 数据
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(String str) {
|
||||
return MetroHash.hash128(str).getLongArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* MetroHash 算法128-bit实现
|
||||
*
|
||||
* @param str 数据
|
||||
* @param seed 种子
|
||||
* @return hash值,long[0]:低位,long[1]:高位
|
||||
*/
|
||||
public static long[] metroHash128(String str, long seed) {
|
||||
return MetroHash.hash128(str,seed).getLongArray();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,81 @@
|
||||
package cn.hutool.core.lang;
|
||||
|
||||
|
||||
import cn.hutool.core.lang.hash.CityHash;
|
||||
import cn.hutool.core.lang.hash.MetroHash;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class MetroHashTest {
|
||||
|
||||
|
||||
/**
|
||||
* 数据量越大 MetroHash 优势越明显,
|
||||
*/
|
||||
@Test
|
||||
public void bulkHashing64Test() {
|
||||
String[] strArray = getRandomStringArray(10000000);
|
||||
long startCity = System.currentTimeMillis();
|
||||
for (String s : strArray) {
|
||||
CityHash.hash64(s.getBytes());
|
||||
}
|
||||
long endCity = System.currentTimeMillis();
|
||||
|
||||
long startMetro = System.currentTimeMillis();
|
||||
for (String s : strArray) {
|
||||
MetroHash.hash64(s);
|
||||
}
|
||||
long endMetro = System.currentTimeMillis();
|
||||
|
||||
System.out.println("metroHash =============" + (endMetro - startMetro));
|
||||
System.out.println("cityHash =============" + (endCity - startCity));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 数据量越大 MetroHash 优势越明显,
|
||||
*/
|
||||
@Test
|
||||
public void bulkHashing128Test() {
|
||||
String[] strArray = getRandomStringArray(10000000);
|
||||
long startCity = System.currentTimeMillis();
|
||||
for (String s : strArray) {
|
||||
CityHash.hash128(s.getBytes());
|
||||
}
|
||||
long endCity = System.currentTimeMillis();
|
||||
|
||||
long startMetro = System.currentTimeMillis();
|
||||
for (String s : strArray) {
|
||||
MetroHash.hash128(s);
|
||||
}
|
||||
long endMetro = System.currentTimeMillis();
|
||||
|
||||
System.out.println("metroHash =============" + (endMetro - startMetro));
|
||||
System.out.println("cityHash =============" + (endCity - startCity));
|
||||
}
|
||||
|
||||
|
||||
private static String[] getRandomStringArray(int length) {
|
||||
String[] result = new String[length];
|
||||
Random random = new Random();
|
||||
int index = 0;
|
||||
while (index < length) {
|
||||
result[index++] = getRandomString(random.nextInt(64));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static String getRandomString(int length) {
|
||||
String str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||
Random random = new Random();
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < length; i++) {
|
||||
int number = random.nextInt(62);
|
||||
sb.append(str.charAt(number));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
}
|
26
hutool-cuckooFilter/pom.xml
Normal file
26
hutool-cuckooFilter/pom.xml
Normal file
@ -0,0 +1,26 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>hutool-parent</artifactId>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<version>5.7.19</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<artifactId>hutool-cuckooFilter</artifactId>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>Hutool 布谷鸟过滤器</description>
|
||||
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>cn.hutool</groupId>
|
||||
<artifactId>hutool-core</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
Loading…
Reference in New Issue
Block a user