add MetroHash

This commit is contained in:
Looly 2022-02-11 17:35:05 +08:00
parent f239ba901f
commit 37f9220e50
14 changed files with 273 additions and 214 deletions

1
.gitignore vendored
View File

@ -31,4 +31,3 @@ build/
# system ignore
.DS_Store
Thumbs.db

View File

@ -18,6 +18,7 @@
* 【core 】 IntMap和LongMap使用位运算快速求解取余运算(pr#2123@Github)
* 【core 】 新增通用builder类GenericBuilder(pr#526@Gitee)
* 【core 】 新增copySafely方法与mkdirsSafely方法(pr#527@Gitee)
* 【core 】 新增MetroHash(pr#532@Gitee)
### 🐞Bug修复
* 【core 】 修复ChineseDate农历获取正月出现数组越界BUGissue#2112@Github

View File

@ -1,5 +1,7 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.util.ByteUtil;
import java.util.Arrays;
/**
@ -140,11 +142,11 @@ public class CityHash {
len = (len - 1) & ~63;
int pos = 0;
do {
x = rotate(x + y + v.getLowValue() + fetch64(data, pos + 8), 37) * k1;
y = rotate(y + v.getHighValue() + fetch64(data, pos + 48), 42) * k1;
x = rotate64(x + y + v.getLowValue() + fetch64(data, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(data, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(data, pos + 40);
z = rotate(z + w.getLowValue(), 33) * k1;
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(data, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(data, pos + 32, z + w.getHighValue(), y + fetch64(data, pos + 16));
// swap z,x value
@ -221,19 +223,19 @@ public class CityHash {
long x = seed.getLowValue();
long y = seed.getHighValue();
long z = len * k1;
v.setLowValue(rotate(y ^ k1, 49) * k1 + fetch64(byteArray, start));
v.setHighValue(rotate(v.getLowValue(), 42) * k1 + fetch64(byteArray, start + 8));
w.setLowValue(rotate(y + z, 35) * k1 + x);
w.setHighValue(rotate(x + fetch64(byteArray, start + 88), 53) * k1);
v.setLowValue(rotate64(y ^ k1, 49) * k1 + fetch64(byteArray, start));
v.setHighValue(rotate64(v.getLowValue(), 42) * k1 + fetch64(byteArray, start + 8));
w.setLowValue(rotate64(y + z, 35) * k1 + x);
w.setHighValue(rotate64(x + fetch64(byteArray, start + 88), 53) * k1);
// This is the same inner loop as CityHash64(), manually unrolled.
int pos = start;
do {
x = rotate(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(byteArray, pos + 40);
z = rotate(z + w.getLowValue(), 33) * k1;
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16));
@ -241,11 +243,11 @@ public class CityHash {
x = z;
z = swapValue;
pos += 64;
x = rotate(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(byteArray, pos + 40);
z = rotate(z + w.getLowValue(), 33) * k1;
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16));
swapValue = x;
@ -254,16 +256,16 @@ public class CityHash {
pos += 64;
len -= 128;
} while (len >= 128);
x += rotate(v.getLowValue() + z, 49) * k0;
y = y * k0 + rotate(w.getHighValue(), 37);
z = z * k0 + rotate(w.getLowValue(), 27);
x += rotate64(v.getLowValue() + z, 49) * k0;
y = y * k0 + rotate64(w.getHighValue(), 37);
z = z * k0 + rotate64(w.getLowValue(), 27);
w.setLowValue(w.getLowValue() * 9);
v.setLowValue(v.getLowValue() * k0);
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (int tail_done = 0; tail_done < len; ) {
tail_done += 32;
y = rotate(x + y, 42) * k0 + v.getHighValue();
y = rotate64(x + y, 42) * k0 + v.getHighValue();
w.setLowValue(w.getLowValue() + fetch64(byteArray, pos + len - tail_done + 16));
x = x * k0 + w.getLowValue();
z += w.getHighValue() + fetch64(byteArray, pos + len - tail_done);
@ -321,8 +323,8 @@ public class CityHash {
long mul = k2 + len * 2L;
long a = fetch64(byteArray, 0) + k2;
long b = fetch64(byteArray, len - 8);
long c = rotate(b, 37) * mul + a;
long d = (rotate(a, 25) + b) * mul;
long c = rotate64(b, 37) * mul + a;
long d = (rotate64(a, 25) + b) * mul;
return hashLen16(c, d, mul);
}
if (len >= 4) {
@ -349,8 +351,8 @@ public class CityHash {
long b = fetch64(byteArray, 8);
long c = fetch64(byteArray, len - 8) * mul;
long d = fetch64(byteArray, len - 16) * k2;
return hashLen16(rotate(a + b, 43) + rotate(c, 30) + d,
a + rotate(b + k2, 18) + c, mul);
return hashLen16(rotate64(a + b, 43) + rotate64(c, 30) + d,
a + rotate64(b + k2, 18) + c, mul);
}
private static long hashLen33to64(byte[] byteArray) {
@ -364,10 +366,10 @@ public class CityHash {
long f = fetch64(byteArray, 24) * 9;
long g = fetch64(byteArray, len - 8);
long h = fetch64(byteArray, len - 16) * mul;
long u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9;
long u = rotate64(a + g, 43) + (rotate64(b, 30) + c) * 9;
long v = ((a + g) ^ d) + f + 1;
long w = Long.reverseBytes((u + v) * mul) + h;
long x = rotate(e + f, 42) + c;
long x = rotate64(e + f, 42) + c;
long y = (Long.reverseBytes((v + w) * mul) + g) * mul;
long z = e + f + c;
a = Long.reverseBytes((x + z) * mul + y) + b;
@ -375,37 +377,15 @@ public class CityHash {
return b + x;
}
private static long loadUnaligned64(final byte[] byteArray, final int start) {
long result = 0;
OrderIter orderIter = new OrderIter(8);
while (orderIter.hasNext()) {
int next = orderIter.next();
long value = (byteArray[next + start] & 0xffL) << (next * 8);
result |= value;
}
return result;
}
private static int loadUnaligned32(final byte[] byteArray, final int start) {
int result = 0;
OrderIter orderIter = new OrderIter(4);
while (orderIter.hasNext()) {
int next = orderIter.next();
int value = (byteArray[next + start] & 0xff) << (next * 8);
result |= value;
}
return result;
}
private static long fetch64(byte[] byteArray, final int start) {
return loadUnaligned64(byteArray, start);
private static long fetch64(byte[] byteArray, int start) {
return ByteUtil.bytesToLong(byteArray, start, ByteUtil.CPU_ENDIAN);
}
private static int fetch32(byte[] byteArray, final int start) {
return loadUnaligned32(byteArray, start);
return ByteUtil.bytesToInt(byteArray, start, ByteUtil.CPU_ENDIAN);
}
private static long rotate(long val, int shift) {
private static long rotate64(long val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >>> shift) | (val << (64 - shift)));
}
@ -465,11 +445,11 @@ public class CityHash {
private static Number128 weakHashLen32WithSeeds(
long w, long x, long y, long z, long a, long b) {
a += w;
b = rotate(b + a + z, 21);
b = rotate64(b + a + z, 21);
long c = a;
a += x;
a += y;
b += rotate(a, 44);
b += rotate64(a, 44);
return new Number128(a + z, b + c);
}
@ -515,24 +495,5 @@ public class CityHash {
b = hashLen16(d, b);
return new Number128(a ^ b, hashLen16(b, a));
}
private static class OrderIter {
private static final boolean IS_LITTLE_ENDIAN = "little".equals(System.getProperty("sun.cpu.endian"));
private final int size;
private int index;
OrderIter(int size) {
this.size = size;
}
boolean hasNext() {
return index < size;
}
int next() {
return IS_LITTLE_ENDIAN ? index++ : (size - 1 - index++);
}
}
//------------------------------------------------------------------------------------------------------- Private method end
}

View File

@ -1,5 +1,8 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.util.ByteUtil;
import java.nio.ByteOrder;
import java.util.Arrays;
/**
@ -12,7 +15,6 @@ import java.util.Arrays;
* Go语言实现https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/
* @author li
*/
public class MetroHash {
/**
@ -31,30 +33,14 @@ public class MetroHash {
private final static long k2_128 = 0x7BDEC03B;
private final static long k3_128 = 0x2F5870A5;
public static long hash64(String str) {
return hash64(str, 1337);
}
public static long hash64(byte[] data) {
return hash64(data, 1337);
}
public static long hash64(String str, long seed) {
return hash64(str.getBytes(), seed);
}
public static Number128 hash128(String str) {
return hash128(str, 1337);
}
public static Number128 hash128(byte[] data) {
return hash128(data, 1337);
}
public static Number128 hash128(String str, long seed) {
return hash128(str.getBytes(), seed);
}
public static long hash64(byte[] data, long seed) {
byte[] buffer = data;
long hash = (seed + k2_64) * k0_64;
@ -68,13 +54,13 @@ public class MetroHash {
if (buffer.length >= 32) {
while (buffer.length >= 32) {
v0 += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k0_64;
v0 += littleEndian64(buffer, 0) * k0_64;
v0 = rotateLeft64(v0, -29) + v2;
v1 += littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k1_64;
v1 += littleEndian64(buffer, 8) * k1_64;
v1 = rotateLeft64(v1, -29) + v3;
v2 += littleEndian64(Arrays.copyOfRange(buffer, 16, 24)) * k2_64;
v2 += littleEndian64(buffer, 24) * k2_64;
v2 = rotateLeft64(v2, -29) + v0;
v3 += littleEndian64(Arrays.copyOfRange(buffer, 24, 32)) * k3_64;
v3 += littleEndian64(buffer, 32) * k3_64;
v3 = rotateLeft64(v3, -29) + v1;
buffer = Arrays.copyOfRange(buffer, 32, buffer.length);
}
@ -87,9 +73,9 @@ public class MetroHash {
}
if (buffer.length >= 16) {
v0 = hash + littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k2_64;
v0 = hash + littleEndian64(buffer, 0) * k2_64;
v0 = rotateLeft64(v0, -29) * k3_64;
v1 = hash + littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k2_64;
v1 = hash + littleEndian64(buffer, 8) * k2_64;
v1 = rotateLeft64(v1, -29) * k3_64;
v0 ^= rotateLeft64(v0 * k0_64, -21) + v1;
v1 ^= rotateLeft64(v1 * k3_64, -21) + v0;
@ -98,7 +84,7 @@ public class MetroHash {
}
if (buffer.length >= 8) {
hash += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k3_64;
hash += littleEndian64(buffer, 0) * k3_64;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
hash ^= rotateLeft64(hash, -55) * k1_64;
}
@ -127,7 +113,6 @@ public class MetroHash {
return hash;
}
public static Number128 hash128(byte[] data, long seed) {
byte[] buffer = data;
@ -141,16 +126,16 @@ public class MetroHash {
v3 = (seed - k1_128) * k3_128;
while (buffer.length >= 32) {
v0 += littleEndian64(buffer) * k0_128;
v0 += littleEndian64(buffer, 0) * k0_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v0 = rotateRight(v0, 29) + v2;
v1 += littleEndian64(buffer) * k1_128;
v1 += littleEndian64(buffer, 0) * k1_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v1 = rotateRight(v1, 29) + v3;
v2 += littleEndian64(buffer) * k2_128;
v2 += littleEndian64(buffer, 0) * k2_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v2 = rotateRight(v2, 29) + v0;
v3 = littleEndian64(buffer) * k3_128;
v3 = littleEndian64(buffer, 0) * k3_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v3 = rotateRight(v3, 29) + v1;
}
@ -162,10 +147,10 @@ public class MetroHash {
}
if (buffer.length >= 16) {
v0 += littleEndian64(buffer) * k2_128;
v0 += littleEndian64(buffer, 0) * k2_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v0 = rotateRight(v0, 33) * k3_128;
v1 += littleEndian64(buffer) * k2_128;
v1 += littleEndian64(buffer, 0) * k2_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v1 = rotateRight(v1, 33) * k3_128;
v0 ^= rotateRight((v0 * k2_128) + v1, 45) + k1_128;
@ -173,7 +158,7 @@ public class MetroHash {
}
if (buffer.length >= 8) {
v0 += littleEndian64(buffer) * k2_128;
v0 += littleEndian64(buffer, 0) * k2_128;
buffer = Arrays.copyOfRange(buffer, 8, buffer.length);
v0 = rotateRight(v0, 33) * k3_128;
v0 ^= rotateRight((v0 * k2_128) + v1, 27) * k1_128;
@ -208,9 +193,8 @@ public class MetroHash {
}
private static long littleEndian64(byte[] b) {
return (long) b[0] | (long) (b[1]) << 8 | (long) b[2] << 16 | (long) b[3] << 24 |
(long) b[4] << 32 | (long) b[5] << 40 | (long) b[6] << 48 | (long) b[7] << 56;
private static long littleEndian64(byte[] b, int start) {
return ByteUtil.bytesToLong(b, start, ByteOrder.LITTLE_ENDIAN);
}
private static int littleEndian32(byte[] b) {
@ -218,7 +202,7 @@ public class MetroHash {
}
private static int littleEndian16(byte[] b) {
return (short) b[0] | (short) b[1] << 8;
return ByteUtil.bytesToShort(b, ByteOrder.LITTLE_ENDIAN);
}
private static long rotateLeft64(long x, int k) {

View File

@ -1,9 +1,11 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.util.ByteUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.StrUtil;
import java.io.Serializable;
import java.nio.ByteOrder;
import java.nio.charset.Charset;
/**
@ -41,6 +43,7 @@ public class MurmurHash implements Serializable{
private static final int DEFAULT_SEED = 0;
private static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8;
private static final ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN;
/**
* Murmur3 32-bit Hash值计算
@ -76,11 +79,8 @@ public class MurmurHash implements Serializable{
// body
for (int i = 0; i < nblocks; i++) {
int i_4 = i << 2;
int k = (data[i_4] & 0xff) //
| ((data[i_4 + 1] & 0xff) << 8) //
| ((data[i_4 + 2] & 0xff) << 16) //
| ((data[i_4 + 3] & 0xff) << 24);
int i4 = i << 2;
int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER);
// mix functions
k *= C1_32;
@ -157,14 +157,7 @@ public class MurmurHash implements Serializable{
// body
for (int i = 0; i < nblocks; i++) {
final int i8 = i << 3;
long k = ((long) data[i8] & 0xff) //
| (((long) data[i8 + 1] & 0xff) << 8) //
| (((long) data[i8 + 2] & 0xff) << 16) //
| (((long) data[i8 + 3] & 0xff) << 24) //
| (((long) data[i8 + 4] & 0xff) << 32)//
| (((long) data[i8 + 5] & 0xff) << 40) //
| (((long) data[i8 + 6] & 0xff) << 48) //
| (((long) data[i8 + 7] & 0xff) << 56);
long k = ByteUtil.bytesToLong(data, i8, DEFAULT_ORDER);
// mix functions
k *= C1;
@ -241,23 +234,8 @@ public class MurmurHash implements Serializable{
// body
for (int i = 0; i < nblocks; i++) {
final int i16 = i << 4;
long k1 = ((long) data[i16] & 0xff) //
| (((long) data[i16 + 1] & 0xff) << 8) //
| (((long) data[i16 + 2] & 0xff) << 16) //
| (((long) data[i16 + 3] & 0xff) << 24) //
| (((long) data[i16 + 4] & 0xff) << 32) //
| (((long) data[i16 + 5] & 0xff) << 40) //
| (((long) data[i16 + 6] & 0xff) << 48) //
| (((long) data[i16 + 7] & 0xff) << 56);
long k2 = ((long) data[i16 + 8] & 0xff) //
| (((long) data[i16 + 9] & 0xff) << 8) //
| (((long) data[i16 + 10] & 0xff) << 16) //
| (((long) data[i16 + 11] & 0xff) << 24) //
| (((long) data[i16 + 12] & 0xff) << 32) //
| (((long) data[i16 + 13] & 0xff) << 40) //
| (((long) data[i16 + 14] & 0xff) << 48) //
| (((long) data[i16 + 15] & 0xff) << 56);
long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER);
long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER);
// mix functions for k1
k1 *= C1;

View File

@ -6,7 +6,7 @@ package cn.hutool.core.lang.hash;
* @author hexiufeng
* @since 5.2.5
*/
public class Number128 extends Number{
public class Number128 extends Number {
private static final long serialVersionUID = 1L;
private long lowValue;
@ -23,22 +23,47 @@ public class Number128 extends Number{
this.highValue = highValue;
}
/**
* 获取低位值
*
* @return 地位值
*/
public long getLowValue() {
return lowValue;
}
public long getHighValue() {
return highValue;
}
/**
* 设置低位值
*
* @param lowValue 低位值
*/
public void setLowValue(long lowValue) {
this.lowValue = lowValue;
}
/**
* 获取高位值
*
* @return 高位值
*/
public long getHighValue() {
return highValue;
}
/**
* 设置高位值
*
* @param hiValue 高位值
*/
public void setHighValue(long hiValue) {
this.highValue = hiValue;
}
/**
* 获取高低位数组long[0]低位long[1]高位
*
* @return 高低位数组long[0]低位long[1]高位
*/
public long[] getLongArray() {
return new long[]{lowValue, highValue};
}

View File

@ -228,7 +228,6 @@ public class TreeBuilder<E> implements Builder<Tree<E>> {
}
final Map<E, Tree<E>> eTreeMap = MapUtil.sortByValue(this.idTreeMap, false);
List<Tree<E>> rootTreeList = CollUtil.newArrayList();
E parentId;
for (Tree<E> node : eTreeMap.values()) {
if (null == node) {
@ -237,7 +236,6 @@ public class TreeBuilder<E> implements Builder<Tree<E>> {
parentId = node.getParentId();
if (ObjectUtil.equals(this.root.getId(), parentId)) {
this.root.addChildren(node);
rootTreeList.add(node);
continue;
}

View File

@ -27,7 +27,11 @@ import java.util.concurrent.atomic.LongAdder;
*/
public class ByteUtil {
public static ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN;
public static final ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN;
/**
* CPU的字节序
*/
public static final ByteOrder CPU_ENDIAN = "little".equals(System.getProperty("sun.cpu.endian")) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN;
/**
* int转byte
@ -130,16 +134,29 @@ public class ByteUtil {
* @return int值
*/
public static int bytesToInt(byte[] bytes, ByteOrder byteOrder) {
return bytesToInt(bytes, 0, byteOrder);
}
/**
* byte[]转int值<br>
* 自定义端序
*
* @param bytes byte数组
* @param byteOrder 端序
* @return int值
* @since 5.7.21
*/
public static int bytesToInt(byte[] bytes, int start, ByteOrder byteOrder) {
if (ByteOrder.LITTLE_ENDIAN == byteOrder) {
return bytes[0] & 0xFF | //
(bytes[1] & 0xFF) << 8 | //
(bytes[2] & 0xFF) << 16 | //
(bytes[3] & 0xFF) << 24; //
return bytes[start] & 0xFF | //
(bytes[1 + start] & 0xFF) << 8 | //
(bytes[2 + start] & 0xFF) << 16 | //
(bytes[3 + start] & 0xFF) << 24; //
} else {
return bytes[3] & 0xFF | //
(bytes[2] & 0xFF) << 8 | //
(bytes[1] & 0xFF) << 16 | //
(bytes[0] & 0xFF) << 24; //
return bytes[3 + start] & 0xFF | //
(bytes[2 + start] & 0xFF) << 8 | //
(bytes[1 + start] & 0xFF) << 16 | //
(bytes[start] & 0xFF) << 24; //
}
}
@ -243,16 +260,31 @@ public class ByteUtil {
* @return long值
*/
public static long bytesToLong(byte[] bytes, ByteOrder byteOrder) {
return bytesToLong(bytes, 0, byteOrder);
}
/**
* byte数组转long<br>
* 自定义端序<br>
* from: https://stackoverflow.com/questions/4485128/how-do-i-convert-long-to-byte-and-back-in-java
*
* @param bytes byte数组
* @param start 计算数组开始位置
* @param byteOrder 端序
* @return long值
* @since 5.7.21
*/
public static long bytesToLong(byte[] bytes, int start, ByteOrder byteOrder) {
long values = 0;
if (ByteOrder.LITTLE_ENDIAN == byteOrder) {
for (int i = (Long.BYTES - 1); i >= 0; i--) {
values <<= Byte.SIZE;
values |= (bytes[i] & 0xff);
values |= (bytes[i + start] & 0xff);
}
} else {
for (int i = 0; i < Long.BYTES; i++) {
values <<= Byte.SIZE;
values |= (bytes[i] & 0xff);
values |= (bytes[i + start] & 0xff);
}
}

View File

@ -387,7 +387,7 @@ public class HashUtil {
if (ucChar <= 'Z' && ucChar >= 'A') {
ucChar = (char) (ucChar + 32);
}
hash += (3 * i * ucChar * ucChar + 5 * i * ucChar + 7 * i + 11 * ucChar) % 16777216;
hash += (3L * i * ucChar * ucChar + 5L * i * ucChar + 7L * i + 11 * ucChar) % 16777216;
}
} else {
for (i = 1; i <= 96; i++) {
@ -395,7 +395,7 @@ public class HashUtil {
if (ucChar <= 'Z' && ucChar >= 'A') {
ucChar = (char) (ucChar + 32);
}
hash += (3 * i * ucChar * ucChar + 5 * i * ucChar + 7 * i + 11 * ucChar) % 16777216;
hash += (3L * i * ucChar * ucChar + 5L * i * ucChar + 7L * i + 11 * ucChar) % 16777216;
}
}
if (hash < 0) {
@ -552,7 +552,7 @@ public class HashUtil {
*
* @param data 数据
* @param seed 种子
* @return
* @return hash值
*/
public static long metroHash64(byte[] data, long seed) {
return MetroHash.hash64(data, seed);
@ -562,33 +562,12 @@ public class HashUtil {
* MetroHash 算法64-bit实现
*
* @param data 数据
* @return
* @return hash值
*/
public static long metroHash64(byte[] data) {
return MetroHash.hash64(data);
}
/**
* MetroHash 算法64-bit实现
*
* @param str 数据
* @param seed 种子
* @return
*/
public static long metroHash64(String str,long seed) {
return MetroHash.hash64(str,seed);
}
/**
* MetroHash 算法64-bit实现
*
* @param str 数据
* @return
*/
public static long metroHash64(String str) {
return MetroHash.hash64(str);
}
/**
* MetroHash 算法128-bit实现
*
@ -609,26 +588,4 @@ public class HashUtil {
public static long[] metroHash128(byte[] data) {
return MetroHash.hash128(data).getLongArray();
}
/**
* MetroHash 算法128-bit实现
*
* @param str 数据
* @return hash值long[0]低位long[1]高位
*/
public static long[] metroHash128(String str) {
return MetroHash.hash128(str).getLongArray();
}
/**
* MetroHash 算法128-bit实现
*
* @param str 数据
* @param seed 种子
* @return hash值long[0]低位long[1]高位
*/
public static long[] metroHash128(String str, long seed) {
return MetroHash.hash128(str,seed).getLongArray();
}
}

View File

@ -0,0 +1,36 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.util.StrUtil;
import org.junit.Assert;
import org.junit.Test;
public class CityHashTest {
@Test
public void hash32Test() {
int hv = CityHash.hash32(StrUtil.utf8Bytes(""));
Assert.assertEquals(1290029860, hv);
hv = CityHash.hash32(StrUtil.utf8Bytes("你好"));
Assert.assertEquals(1374181357, hv);
hv = CityHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
Assert.assertEquals(1475516842, hv);
hv = CityHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
Assert.assertEquals(0x51020cae, hv);
}
@Test
public void hash64Test() {
long hv = CityHash.hash64(StrUtil.utf8Bytes(""));
Assert.assertEquals(-4296898700418225525L, hv);
hv = CityHash.hash64(StrUtil.utf8Bytes("你好"));
Assert.assertEquals(-4294276205456761303L, hv);
hv = CityHash.hash64(StrUtil.utf8Bytes("见到你很高兴"));
Assert.assertEquals(272351505337503793L, hv);
hv = CityHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
Assert.assertEquals(-8234735310919228703L, hv);
}
}

View File

@ -1,19 +1,45 @@
package cn.hutool.core.lang;
package cn.hutool.core.lang.hash;
import cn.hutool.core.lang.hash.CityHash;
import cn.hutool.core.lang.hash.MetroHash;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.HexUtil;
import cn.hutool.core.util.StrUtil;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.util.Random;
/**
* https://gitee.com/dromara/hutool/pulls/532
*/
public class MetroHashTest {
@Test
public void testEmpty() {
Assert.assertEquals("31290877cceaea29", HexUtil.toHex(MetroHash.hash64(StrUtil.utf8Bytes(""), 0)));
}
@Test
public void metroHash64Test() {
byte[] str = "我是一段测试123".getBytes(CharsetUtil.CHARSET_UTF_8);
final long hash64 = MetroHash.hash64(str);
Assert.assertEquals(62920234463891865L, hash64);
}
@Test
public void metroHash128Test() {
byte[] str = "我是一段测试123".getBytes(CharsetUtil.CHARSET_UTF_8);
final long[] hash128 = MetroHash.hash128(str).getLongArray();
Assert.assertEquals(4629350038757384271L, hash128[0]);
Assert.assertEquals(-1607305036506152112L, hash128[1]);
}
/**
* 数据量越大 MetroHash 优势越明显
*/
@Test
@Ignore
public void bulkHashing64Test() {
String[] strArray = getRandomStringArray(10000000);
long startCity = System.currentTimeMillis();
@ -24,7 +50,7 @@ public class MetroHashTest {
long startMetro = System.currentTimeMillis();
for (String s : strArray) {
MetroHash.hash64(s);
MetroHash.hash64(StrUtil.utf8Bytes(s));
}
long endMetro = System.currentTimeMillis();
@ -37,6 +63,7 @@ public class MetroHashTest {
* 数据量越大 MetroHash 优势越明显
*/
@Test
@Ignore
public void bulkHashing128Test() {
String[] strArray = getRandomStringArray(10000000);
long startCity = System.currentTimeMillis();
@ -47,7 +74,7 @@ public class MetroHashTest {
long startMetro = System.currentTimeMillis();
for (String s : strArray) {
MetroHash.hash128(s);
MetroHash.hash128(StrUtil.utf8Bytes(s));
}
long endMetro = System.currentTimeMillis();

View File

@ -0,0 +1,36 @@
package cn.hutool.core.lang.hash;
import cn.hutool.core.util.StrUtil;
import org.junit.Assert;
import org.junit.Test;
public class MurMurHashTest {
@Test
public void hash32Test() {
int hv = MurmurHash.hash32(StrUtil.utf8Bytes(""));
Assert.assertEquals(222142701, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("你好"));
Assert.assertEquals(1188098267, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("见到你很高兴"));
Assert.assertEquals(-1898490321, hv);
hv = MurmurHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
Assert.assertEquals(-1713131054, hv);
}
@Test
public void hash64Test() {
long hv = MurmurHash.hash64(StrUtil.utf8Bytes(""));
Assert.assertEquals(-1349759534971957051L, hv);
hv = MurmurHash.hash64(StrUtil.utf8Bytes("你好"));
Assert.assertEquals(-7563732748897304996L, hv);
hv = MurmurHash.hash64(StrUtil.utf8Bytes("见到你很高兴"));
Assert.assertEquals(-766658210119995316L, hv);
hv = MurmurHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件"));
Assert.assertEquals(-7469283059271653317L, hv);
}
}

View File

@ -10,9 +10,16 @@ public class ByteUtilTest {
@Test
public void intAndBytesLittleEndianTest() {
// 测试 int 转小端序 byte 数组
int int1 = RandomUtil.randomInt();
int int1 = RandomUtil.randomInt((Integer.MAX_VALUE));
ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putInt(int1);
byte[] bytesIntFromBuffer = buffer.array();
byte[] bytesInt = ByteUtil.intToBytes(int1, ByteOrder.LITTLE_ENDIAN);
Assert.assertArrayEquals(bytesIntFromBuffer, bytesInt);
int int2 = ByteUtil.bytesToInt(bytesInt, ByteOrder.LITTLE_ENDIAN);
Assert.assertEquals(int1, int2);
@ -28,8 +35,14 @@ public class ByteUtilTest {
@Test
public void intAndBytesBigEndianTest() {
// 测试 int 转大端序 byte 数组
int int2 = RandomUtil.randomInt();
int int2 = RandomUtil.randomInt(Integer.MAX_VALUE);
ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES);
buffer.putInt(int2);
byte[] bytesIntFromBuffer = buffer.array();
byte[] bytesInt = ByteUtil.intToBytes(int2, ByteOrder.BIG_ENDIAN);
Assert.assertArrayEquals(bytesIntFromBuffer, bytesInt);
// 测试大端序 byte 数组转 int
int int3 = ByteUtil.bytesToInt(bytesInt, ByteOrder.BIG_ENDIAN);
@ -39,9 +52,16 @@ public class ByteUtilTest {
@Test
public void longAndBytesLittleEndianTest() {
// 测试 long byte 数组
long long1 = 2223;
long long1 = RandomUtil.randomLong(Long.MAX_VALUE);
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putLong(long1);
byte[] bytesLongFromBuffer = buffer.array();
byte[] bytesLong = ByteUtil.longToBytes(long1, ByteOrder.LITTLE_ENDIAN);
Assert.assertArrayEquals(bytesLongFromBuffer, bytesLong);
long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.LITTLE_ENDIAN);
Assert.assertEquals(long1, long2);
@ -57,11 +77,16 @@ public class ByteUtilTest {
@Test
public void longAndBytesBigEndianTest() {
// 测试大端序 long byte 数组
long long1 = 2223;
long long1 = RandomUtil.randomLong(Long.MAX_VALUE);
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.putLong(long1);
byte[] bytesLongFromBuffer = buffer.array();
byte[] bytesLong = ByteUtil.longToBytes(long1, ByteOrder.BIG_ENDIAN);
long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.BIG_ENDIAN);
Assert.assertArrayEquals(bytesLongFromBuffer, bytesLong);
long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.BIG_ENDIAN);
Assert.assertEquals(long1, long2);
}

View File

@ -35,7 +35,7 @@
<module>hutool-captcha</module>
<module>hutool-socket</module>
<module>hutool-jwt</module>
</modules>
</modules>
<properties>
<project.build.sourceEncoding>utf-8</project.build.sourceEncoding>
@ -157,7 +157,7 @@
<format>html</format>
<format>xml</format>
</formats>
<check />
<check/>
</configuration>
</plugin>
<!-- Gpg Signature -->