diff --git a/.gitignore b/.gitignore index 55497abff..2a93184c8 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,3 @@ build/ # system ignore .DS_Store Thumbs.db - diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6de0471..4fc5d61ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ * 【core 】 IntMap和LongMap使用位运算快速求解取余运算(pr#2123@Github) * 【core 】 新增通用builder类:GenericBuilder(pr#526@Gitee) * 【core 】 新增copySafely方法与mkdirsSafely方法(pr#527@Gitee) +* 【core 】 新增MetroHash(pr#532@Gitee) ### 🐞Bug修复 * 【core 】 修复ChineseDate农历获取正月出现数组越界BUG(issue#2112@Github) diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/CityHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/CityHash.java index aad2da056..b5558ea74 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/hash/CityHash.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/CityHash.java @@ -1,5 +1,7 @@ package cn.hutool.core.lang.hash; +import cn.hutool.core.util.ByteUtil; + import java.util.Arrays; /** @@ -140,11 +142,11 @@ public class CityHash { len = (len - 1) & ~63; int pos = 0; do { - x = rotate(x + y + v.getLowValue() + fetch64(data, pos + 8), 37) * k1; - y = rotate(y + v.getHighValue() + fetch64(data, pos + 48), 42) * k1; + x = rotate64(x + y + v.getLowValue() + fetch64(data, pos + 8), 37) * k1; + y = rotate64(y + v.getHighValue() + fetch64(data, pos + 48), 42) * k1; x ^= w.getHighValue(); y += v.getLowValue() + fetch64(data, pos + 40); - z = rotate(z + w.getLowValue(), 33) * k1; + z = rotate64(z + w.getLowValue(), 33) * k1; v = weakHashLen32WithSeeds(data, pos, v.getHighValue() * k1, x + w.getLowValue()); w = weakHashLen32WithSeeds(data, pos + 32, z + w.getHighValue(), y + fetch64(data, pos + 16)); // swap z,x value @@ -221,19 +223,19 @@ public class CityHash { long x = seed.getLowValue(); long y = seed.getHighValue(); long z = len * k1; - v.setLowValue(rotate(y ^ k1, 49) * k1 + fetch64(byteArray, start)); - v.setHighValue(rotate(v.getLowValue(), 42) * k1 + fetch64(byteArray, start + 8)); - w.setLowValue(rotate(y + z, 35) * k1 + x); - w.setHighValue(rotate(x + fetch64(byteArray, start + 88), 53) * k1); + v.setLowValue(rotate64(y ^ k1, 49) * k1 + fetch64(byteArray, start)); + v.setHighValue(rotate64(v.getLowValue(), 42) * k1 + fetch64(byteArray, start + 8)); + w.setLowValue(rotate64(y + z, 35) * k1 + x); + w.setHighValue(rotate64(x + fetch64(byteArray, start + 88), 53) * k1); // This is the same inner loop as CityHash64(), manually unrolled. int pos = start; do { - x = rotate(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1; - y = rotate(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1; + x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1; + y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1; x ^= w.getHighValue(); y += v.getLowValue() + fetch64(byteArray, pos + 40); - z = rotate(z + w.getLowValue(), 33) * k1; + z = rotate64(z + w.getLowValue(), 33) * k1; v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue()); w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16)); @@ -241,11 +243,11 @@ public class CityHash { x = z; z = swapValue; pos += 64; - x = rotate(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1; - y = rotate(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1; + x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1; + y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1; x ^= w.getHighValue(); y += v.getLowValue() + fetch64(byteArray, pos + 40); - z = rotate(z + w.getLowValue(), 33) * k1; + z = rotate64(z + w.getLowValue(), 33) * k1; v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue()); w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16)); swapValue = x; @@ -254,16 +256,16 @@ public class CityHash { pos += 64; len -= 128; } while (len >= 128); - x += rotate(v.getLowValue() + z, 49) * k0; - y = y * k0 + rotate(w.getHighValue(), 37); - z = z * k0 + rotate(w.getLowValue(), 27); + x += rotate64(v.getLowValue() + z, 49) * k0; + y = y * k0 + rotate64(w.getHighValue(), 37); + z = z * k0 + rotate64(w.getLowValue(), 27); w.setLowValue(w.getLowValue() * 9); v.setLowValue(v.getLowValue() * k0); // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. for (int tail_done = 0; tail_done < len; ) { tail_done += 32; - y = rotate(x + y, 42) * k0 + v.getHighValue(); + y = rotate64(x + y, 42) * k0 + v.getHighValue(); w.setLowValue(w.getLowValue() + fetch64(byteArray, pos + len - tail_done + 16)); x = x * k0 + w.getLowValue(); z += w.getHighValue() + fetch64(byteArray, pos + len - tail_done); @@ -321,8 +323,8 @@ public class CityHash { long mul = k2 + len * 2L; long a = fetch64(byteArray, 0) + k2; long b = fetch64(byteArray, len - 8); - long c = rotate(b, 37) * mul + a; - long d = (rotate(a, 25) + b) * mul; + long c = rotate64(b, 37) * mul + a; + long d = (rotate64(a, 25) + b) * mul; return hashLen16(c, d, mul); } if (len >= 4) { @@ -349,8 +351,8 @@ public class CityHash { long b = fetch64(byteArray, 8); long c = fetch64(byteArray, len - 8) * mul; long d = fetch64(byteArray, len - 16) * k2; - return hashLen16(rotate(a + b, 43) + rotate(c, 30) + d, - a + rotate(b + k2, 18) + c, mul); + return hashLen16(rotate64(a + b, 43) + rotate64(c, 30) + d, + a + rotate64(b + k2, 18) + c, mul); } private static long hashLen33to64(byte[] byteArray) { @@ -364,10 +366,10 @@ public class CityHash { long f = fetch64(byteArray, 24) * 9; long g = fetch64(byteArray, len - 8); long h = fetch64(byteArray, len - 16) * mul; - long u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9; + long u = rotate64(a + g, 43) + (rotate64(b, 30) + c) * 9; long v = ((a + g) ^ d) + f + 1; long w = Long.reverseBytes((u + v) * mul) + h; - long x = rotate(e + f, 42) + c; + long x = rotate64(e + f, 42) + c; long y = (Long.reverseBytes((v + w) * mul) + g) * mul; long z = e + f + c; a = Long.reverseBytes((x + z) * mul + y) + b; @@ -375,37 +377,15 @@ public class CityHash { return b + x; } - private static long loadUnaligned64(final byte[] byteArray, final int start) { - long result = 0; - OrderIter orderIter = new OrderIter(8); - while (orderIter.hasNext()) { - int next = orderIter.next(); - long value = (byteArray[next + start] & 0xffL) << (next * 8); - result |= value; - } - return result; - } - - private static int loadUnaligned32(final byte[] byteArray, final int start) { - int result = 0; - OrderIter orderIter = new OrderIter(4); - while (orderIter.hasNext()) { - int next = orderIter.next(); - int value = (byteArray[next + start] & 0xff) << (next * 8); - result |= value; - } - return result; - } - - private static long fetch64(byte[] byteArray, final int start) { - return loadUnaligned64(byteArray, start); + private static long fetch64(byte[] byteArray, int start) { + return ByteUtil.bytesToLong(byteArray, start, ByteUtil.CPU_ENDIAN); } private static int fetch32(byte[] byteArray, final int start) { - return loadUnaligned32(byteArray, start); + return ByteUtil.bytesToInt(byteArray, start, ByteUtil.CPU_ENDIAN); } - private static long rotate(long val, int shift) { + private static long rotate64(long val, int shift) { // Avoid shifting by 64: doing so yields an undefined result. return shift == 0 ? val : ((val >>> shift) | (val << (64 - shift))); } @@ -465,11 +445,11 @@ public class CityHash { private static Number128 weakHashLen32WithSeeds( long w, long x, long y, long z, long a, long b) { a += w; - b = rotate(b + a + z, 21); + b = rotate64(b + a + z, 21); long c = a; a += x; a += y; - b += rotate(a, 44); + b += rotate64(a, 44); return new Number128(a + z, b + c); } @@ -515,24 +495,5 @@ public class CityHash { b = hashLen16(d, b); return new Number128(a ^ b, hashLen16(b, a)); } - - private static class OrderIter { - private static final boolean IS_LITTLE_ENDIAN = "little".equals(System.getProperty("sun.cpu.endian")); - - private final int size; - private int index; - - OrderIter(int size) { - this.size = size; - } - - boolean hasNext() { - return index < size; - } - - int next() { - return IS_LITTLE_ENDIAN ? index++ : (size - 1 - index++); - } - } //------------------------------------------------------------------------------------------------------- Private method end } diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java index e86dc3d97..bbf40c882 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MetroHash.java @@ -1,5 +1,8 @@ package cn.hutool.core.lang.hash; +import cn.hutool.core.util.ByteUtil; + +import java.nio.ByteOrder; import java.util.Arrays; /** @@ -12,7 +15,6 @@ import java.util.Arrays; * Go语言实现:https://github.com/linvon/cuckoo-filter/blob/main/vendor/github.com/dgryski/go-metro/ * @author li */ - public class MetroHash { /** @@ -31,30 +33,14 @@ public class MetroHash { private final static long k2_128 = 0x7BDEC03B; private final static long k3_128 = 0x2F5870A5; - public static long hash64(String str) { - return hash64(str, 1337); - } - public static long hash64(byte[] data) { return hash64(data, 1337); } - public static long hash64(String str, long seed) { - return hash64(str.getBytes(), seed); - } - - public static Number128 hash128(String str) { - return hash128(str, 1337); - } - public static Number128 hash128(byte[] data) { return hash128(data, 1337); } - public static Number128 hash128(String str, long seed) { - return hash128(str.getBytes(), seed); - } - public static long hash64(byte[] data, long seed) { byte[] buffer = data; long hash = (seed + k2_64) * k0_64; @@ -68,13 +54,13 @@ public class MetroHash { if (buffer.length >= 32) { while (buffer.length >= 32) { - v0 += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k0_64; + v0 += littleEndian64(buffer, 0) * k0_64; v0 = rotateLeft64(v0, -29) + v2; - v1 += littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k1_64; + v1 += littleEndian64(buffer, 8) * k1_64; v1 = rotateLeft64(v1, -29) + v3; - v2 += littleEndian64(Arrays.copyOfRange(buffer, 16, 24)) * k2_64; + v2 += littleEndian64(buffer, 24) * k2_64; v2 = rotateLeft64(v2, -29) + v0; - v3 += littleEndian64(Arrays.copyOfRange(buffer, 24, 32)) * k3_64; + v3 += littleEndian64(buffer, 32) * k3_64; v3 = rotateLeft64(v3, -29) + v1; buffer = Arrays.copyOfRange(buffer, 32, buffer.length); } @@ -87,9 +73,9 @@ public class MetroHash { } if (buffer.length >= 16) { - v0 = hash + littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k2_64; + v0 = hash + littleEndian64(buffer, 0) * k2_64; v0 = rotateLeft64(v0, -29) * k3_64; - v1 = hash + littleEndian64(Arrays.copyOfRange(buffer, 8, 16)) * k2_64; + v1 = hash + littleEndian64(buffer, 8) * k2_64; v1 = rotateLeft64(v1, -29) * k3_64; v0 ^= rotateLeft64(v0 * k0_64, -21) + v1; v1 ^= rotateLeft64(v1 * k3_64, -21) + v0; @@ -98,7 +84,7 @@ public class MetroHash { } if (buffer.length >= 8) { - hash += littleEndian64(Arrays.copyOfRange(buffer, 0, 8)) * k3_64; + hash += littleEndian64(buffer, 0) * k3_64; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); hash ^= rotateLeft64(hash, -55) * k1_64; } @@ -127,7 +113,6 @@ public class MetroHash { return hash; } - public static Number128 hash128(byte[] data, long seed) { byte[] buffer = data; @@ -141,16 +126,16 @@ public class MetroHash { v3 = (seed - k1_128) * k3_128; while (buffer.length >= 32) { - v0 += littleEndian64(buffer) * k0_128; + v0 += littleEndian64(buffer, 0) * k0_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v0 = rotateRight(v0, 29) + v2; - v1 += littleEndian64(buffer) * k1_128; + v1 += littleEndian64(buffer, 0) * k1_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v1 = rotateRight(v1, 29) + v3; - v2 += littleEndian64(buffer) * k2_128; + v2 += littleEndian64(buffer, 0) * k2_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v2 = rotateRight(v2, 29) + v0; - v3 = littleEndian64(buffer) * k3_128; + v3 = littleEndian64(buffer, 0) * k3_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v3 = rotateRight(v3, 29) + v1; } @@ -162,10 +147,10 @@ public class MetroHash { } if (buffer.length >= 16) { - v0 += littleEndian64(buffer) * k2_128; + v0 += littleEndian64(buffer, 0) * k2_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v0 = rotateRight(v0, 33) * k3_128; - v1 += littleEndian64(buffer) * k2_128; + v1 += littleEndian64(buffer, 0) * k2_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v1 = rotateRight(v1, 33) * k3_128; v0 ^= rotateRight((v0 * k2_128) + v1, 45) + k1_128; @@ -173,7 +158,7 @@ public class MetroHash { } if (buffer.length >= 8) { - v0 += littleEndian64(buffer) * k2_128; + v0 += littleEndian64(buffer, 0) * k2_128; buffer = Arrays.copyOfRange(buffer, 8, buffer.length); v0 = rotateRight(v0, 33) * k3_128; v0 ^= rotateRight((v0 * k2_128) + v1, 27) * k1_128; @@ -208,9 +193,8 @@ public class MetroHash { } - private static long littleEndian64(byte[] b) { - return (long) b[0] | (long) (b[1]) << 8 | (long) b[2] << 16 | (long) b[3] << 24 | - (long) b[4] << 32 | (long) b[5] << 40 | (long) b[6] << 48 | (long) b[7] << 56; + private static long littleEndian64(byte[] b, int start) { + return ByteUtil.bytesToLong(b, start, ByteOrder.LITTLE_ENDIAN); } private static int littleEndian32(byte[] b) { @@ -218,7 +202,7 @@ public class MetroHash { } private static int littleEndian16(byte[] b) { - return (short) b[0] | (short) b[1] << 8; + return ByteUtil.bytesToShort(b, ByteOrder.LITTLE_ENDIAN); } private static long rotateLeft64(long x, int k) { diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java index b3dd26961..94826602b 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/MurmurHash.java @@ -1,9 +1,11 @@ package cn.hutool.core.lang.hash; +import cn.hutool.core.util.ByteUtil; import cn.hutool.core.util.CharsetUtil; import cn.hutool.core.util.StrUtil; import java.io.Serializable; +import java.nio.ByteOrder; import java.nio.charset.Charset; /** @@ -41,6 +43,7 @@ public class MurmurHash implements Serializable{ private static final int DEFAULT_SEED = 0; private static final Charset DEFAULT_CHARSET = CharsetUtil.CHARSET_UTF_8; + private static final ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN; /** * Murmur3 32-bit Hash值计算 @@ -76,11 +79,8 @@ public class MurmurHash implements Serializable{ // body for (int i = 0; i < nblocks; i++) { - int i_4 = i << 2; - int k = (data[i_4] & 0xff) // - | ((data[i_4 + 1] & 0xff) << 8) // - | ((data[i_4 + 2] & 0xff) << 16) // - | ((data[i_4 + 3] & 0xff) << 24); + int i4 = i << 2; + int k = ByteUtil.bytesToInt(data, i4, DEFAULT_ORDER); // mix functions k *= C1_32; @@ -157,14 +157,7 @@ public class MurmurHash implements Serializable{ // body for (int i = 0; i < nblocks; i++) { final int i8 = i << 3; - long k = ((long) data[i8] & 0xff) // - | (((long) data[i8 + 1] & 0xff) << 8) // - | (((long) data[i8 + 2] & 0xff) << 16) // - | (((long) data[i8 + 3] & 0xff) << 24) // - | (((long) data[i8 + 4] & 0xff) << 32)// - | (((long) data[i8 + 5] & 0xff) << 40) // - | (((long) data[i8 + 6] & 0xff) << 48) // - | (((long) data[i8 + 7] & 0xff) << 56); + long k = ByteUtil.bytesToLong(data, i8, DEFAULT_ORDER); // mix functions k *= C1; @@ -241,23 +234,8 @@ public class MurmurHash implements Serializable{ // body for (int i = 0; i < nblocks; i++) { final int i16 = i << 4; - long k1 = ((long) data[i16] & 0xff) // - | (((long) data[i16 + 1] & 0xff) << 8) // - | (((long) data[i16 + 2] & 0xff) << 16) // - | (((long) data[i16 + 3] & 0xff) << 24) // - | (((long) data[i16 + 4] & 0xff) << 32) // - | (((long) data[i16 + 5] & 0xff) << 40) // - | (((long) data[i16 + 6] & 0xff) << 48) // - | (((long) data[i16 + 7] & 0xff) << 56); - - long k2 = ((long) data[i16 + 8] & 0xff) // - | (((long) data[i16 + 9] & 0xff) << 8) // - | (((long) data[i16 + 10] & 0xff) << 16) // - | (((long) data[i16 + 11] & 0xff) << 24) // - | (((long) data[i16 + 12] & 0xff) << 32) // - | (((long) data[i16 + 13] & 0xff) << 40) // - | (((long) data[i16 + 14] & 0xff) << 48) // - | (((long) data[i16 + 15] & 0xff) << 56); + long k1 = ByteUtil.bytesToLong(data, i16, DEFAULT_ORDER); + long k2 = ByteUtil.bytesToLong(data, i16 + 8, DEFAULT_ORDER); // mix functions for k1 k1 *= C1; diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/hash/Number128.java b/hutool-core/src/main/java/cn/hutool/core/lang/hash/Number128.java index 4f6301095..12e0a8c35 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/hash/Number128.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/hash/Number128.java @@ -6,7 +6,7 @@ package cn.hutool.core.lang.hash; * @author hexiufeng * @since 5.2.5 */ -public class Number128 extends Number{ +public class Number128 extends Number { private static final long serialVersionUID = 1L; private long lowValue; @@ -23,22 +23,47 @@ public class Number128 extends Number{ this.highValue = highValue; } + /** + * 获取低位值 + * + * @return 地位值 + */ public long getLowValue() { return lowValue; } - public long getHighValue() { - return highValue; - } - + /** + * 设置低位值 + * + * @param lowValue 低位值 + */ public void setLowValue(long lowValue) { this.lowValue = lowValue; } + /** + * 获取高位值 + * + * @return 高位值 + */ + public long getHighValue() { + return highValue; + } + + /** + * 设置高位值 + * + * @param hiValue 高位值 + */ public void setHighValue(long hiValue) { this.highValue = hiValue; } + /** + * 获取高低位数组,long[0]:低位,long[1]:高位 + * + * @return 高低位数组,long[0]:低位,long[1]:高位 + */ public long[] getLongArray() { return new long[]{lowValue, highValue}; } diff --git a/hutool-core/src/main/java/cn/hutool/core/lang/tree/TreeBuilder.java b/hutool-core/src/main/java/cn/hutool/core/lang/tree/TreeBuilder.java index 37af3f94a..ebc1e0c13 100644 --- a/hutool-core/src/main/java/cn/hutool/core/lang/tree/TreeBuilder.java +++ b/hutool-core/src/main/java/cn/hutool/core/lang/tree/TreeBuilder.java @@ -228,7 +228,6 @@ public class TreeBuilder implements Builder> { } final Map> eTreeMap = MapUtil.sortByValue(this.idTreeMap, false); - List> rootTreeList = CollUtil.newArrayList(); E parentId; for (Tree node : eTreeMap.values()) { if (null == node) { @@ -237,7 +236,6 @@ public class TreeBuilder implements Builder> { parentId = node.getParentId(); if (ObjectUtil.equals(this.root.getId(), parentId)) { this.root.addChildren(node); - rootTreeList.add(node); continue; } diff --git a/hutool-core/src/main/java/cn/hutool/core/util/ByteUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/ByteUtil.java index f20d26914..46f988c65 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/ByteUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/ByteUtil.java @@ -27,7 +27,11 @@ import java.util.concurrent.atomic.LongAdder; */ public class ByteUtil { - public static ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN; + public static final ByteOrder DEFAULT_ORDER = ByteOrder.LITTLE_ENDIAN; + /** + * CPU的字节序 + */ + public static final ByteOrder CPU_ENDIAN = "little".equals(System.getProperty("sun.cpu.endian")) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN; /** * int转byte @@ -130,16 +134,29 @@ public class ByteUtil { * @return int值 */ public static int bytesToInt(byte[] bytes, ByteOrder byteOrder) { + return bytesToInt(bytes, 0, byteOrder); + } + + /** + * byte[]转int值
+ * 自定义端序 + * + * @param bytes byte数组 + * @param byteOrder 端序 + * @return int值 + * @since 5.7.21 + */ + public static int bytesToInt(byte[] bytes, int start, ByteOrder byteOrder) { if (ByteOrder.LITTLE_ENDIAN == byteOrder) { - return bytes[0] & 0xFF | // - (bytes[1] & 0xFF) << 8 | // - (bytes[2] & 0xFF) << 16 | // - (bytes[3] & 0xFF) << 24; // + return bytes[start] & 0xFF | // + (bytes[1 + start] & 0xFF) << 8 | // + (bytes[2 + start] & 0xFF) << 16 | // + (bytes[3 + start] & 0xFF) << 24; // } else { - return bytes[3] & 0xFF | // - (bytes[2] & 0xFF) << 8 | // - (bytes[1] & 0xFF) << 16 | // - (bytes[0] & 0xFF) << 24; // + return bytes[3 + start] & 0xFF | // + (bytes[2 + start] & 0xFF) << 8 | // + (bytes[1 + start] & 0xFF) << 16 | // + (bytes[start] & 0xFF) << 24; // } } @@ -243,16 +260,31 @@ public class ByteUtil { * @return long值 */ public static long bytesToLong(byte[] bytes, ByteOrder byteOrder) { + return bytesToLong(bytes, 0, byteOrder); + } + + /** + * byte数组转long
+ * 自定义端序
+ * from: https://stackoverflow.com/questions/4485128/how-do-i-convert-long-to-byte-and-back-in-java + * + * @param bytes byte数组 + * @param start 计算数组开始位置 + * @param byteOrder 端序 + * @return long值 + * @since 5.7.21 + */ + public static long bytesToLong(byte[] bytes, int start, ByteOrder byteOrder) { long values = 0; if (ByteOrder.LITTLE_ENDIAN == byteOrder) { for (int i = (Long.BYTES - 1); i >= 0; i--) { values <<= Byte.SIZE; - values |= (bytes[i] & 0xff); + values |= (bytes[i + start] & 0xff); } } else { for (int i = 0; i < Long.BYTES; i++) { values <<= Byte.SIZE; - values |= (bytes[i] & 0xff); + values |= (bytes[i + start] & 0xff); } } diff --git a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java index 5bd64d5aa..bafe37995 100644 --- a/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java +++ b/hutool-core/src/main/java/cn/hutool/core/util/HashUtil.java @@ -387,7 +387,7 @@ public class HashUtil { if (ucChar <= 'Z' && ucChar >= 'A') { ucChar = (char) (ucChar + 32); } - hash += (3 * i * ucChar * ucChar + 5 * i * ucChar + 7 * i + 11 * ucChar) % 16777216; + hash += (3L * i * ucChar * ucChar + 5L * i * ucChar + 7L * i + 11 * ucChar) % 16777216; } } else { for (i = 1; i <= 96; i++) { @@ -395,7 +395,7 @@ public class HashUtil { if (ucChar <= 'Z' && ucChar >= 'A') { ucChar = (char) (ucChar + 32); } - hash += (3 * i * ucChar * ucChar + 5 * i * ucChar + 7 * i + 11 * ucChar) % 16777216; + hash += (3L * i * ucChar * ucChar + 5L * i * ucChar + 7L * i + 11 * ucChar) % 16777216; } } if (hash < 0) { @@ -552,7 +552,7 @@ public class HashUtil { * * @param data 数据 * @param seed 种子 - * @return + * @return hash值 */ public static long metroHash64(byte[] data, long seed) { return MetroHash.hash64(data, seed); @@ -562,33 +562,12 @@ public class HashUtil { * MetroHash 算法64-bit实现 * * @param data 数据 - * @return + * @return hash值 */ public static long metroHash64(byte[] data) { return MetroHash.hash64(data); } - /** - * MetroHash 算法64-bit实现 - * - * @param str 数据 - * @param seed 种子 - * @return - */ - public static long metroHash64(String str,long seed) { - return MetroHash.hash64(str,seed); - } - - /** - * MetroHash 算法64-bit实现 - * - * @param str 数据 - * @return - */ - public static long metroHash64(String str) { - return MetroHash.hash64(str); - } - /** * MetroHash 算法128-bit实现 * @@ -609,26 +588,4 @@ public class HashUtil { public static long[] metroHash128(byte[] data) { return MetroHash.hash128(data).getLongArray(); } - - /** - * MetroHash 算法128-bit实现 - * - * @param str 数据 - * @return hash值,long[0]:低位,long[1]:高位 - */ - public static long[] metroHash128(String str) { - return MetroHash.hash128(str).getLongArray(); - } - - /** - * MetroHash 算法128-bit实现 - * - * @param str 数据 - * @param seed 种子 - * @return hash值,long[0]:低位,long[1]:高位 - */ - public static long[] metroHash128(String str, long seed) { - return MetroHash.hash128(str,seed).getLongArray(); - } - } diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/hash/CityHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/hash/CityHashTest.java new file mode 100755 index 000000000..d253ffa2e --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/core/lang/hash/CityHashTest.java @@ -0,0 +1,36 @@ +package cn.hutool.core.lang.hash; + +import cn.hutool.core.util.StrUtil; +import org.junit.Assert; +import org.junit.Test; + +public class CityHashTest { + + @Test + public void hash32Test() { + int hv = CityHash.hash32(StrUtil.utf8Bytes("你")); + Assert.assertEquals(1290029860, hv); + + hv = CityHash.hash32(StrUtil.utf8Bytes("你好")); + Assert.assertEquals(1374181357, hv); + + hv = CityHash.hash32(StrUtil.utf8Bytes("见到你很高兴")); + Assert.assertEquals(1475516842, hv); + hv = CityHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件")); + Assert.assertEquals(0x51020cae, hv); + } + + @Test + public void hash64Test() { + long hv = CityHash.hash64(StrUtil.utf8Bytes("你")); + Assert.assertEquals(-4296898700418225525L, hv); + + hv = CityHash.hash64(StrUtil.utf8Bytes("你好")); + Assert.assertEquals(-4294276205456761303L, hv); + + hv = CityHash.hash64(StrUtil.utf8Bytes("见到你很高兴")); + Assert.assertEquals(272351505337503793L, hv); + hv = CityHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件")); + Assert.assertEquals(-8234735310919228703L, hv); + } +} diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MetroHashTest.java similarity index 65% rename from hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java rename to hutool-core/src/test/java/cn/hutool/core/lang/hash/MetroHashTest.java index b30b5f9f7..553ebee33 100644 --- a/hutool-core/src/test/java/cn/hutool/core/lang/MetroHashTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MetroHashTest.java @@ -1,19 +1,45 @@ -package cn.hutool.core.lang; +package cn.hutool.core.lang.hash; -import cn.hutool.core.lang.hash.CityHash; -import cn.hutool.core.lang.hash.MetroHash; +import cn.hutool.core.util.CharsetUtil; +import cn.hutool.core.util.HexUtil; +import cn.hutool.core.util.StrUtil; +import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import java.util.Random; +/** + * https://gitee.com/dromara/hutool/pulls/532 + */ public class MetroHashTest { + @Test + public void testEmpty() { + Assert.assertEquals("31290877cceaea29", HexUtil.toHex(MetroHash.hash64(StrUtil.utf8Bytes(""), 0))); + } + + @Test + public void metroHash64Test() { + byte[] str = "我是一段测试123".getBytes(CharsetUtil.CHARSET_UTF_8); + final long hash64 = MetroHash.hash64(str); + Assert.assertEquals(62920234463891865L, hash64); + } + + @Test + public void metroHash128Test() { + byte[] str = "我是一段测试123".getBytes(CharsetUtil.CHARSET_UTF_8); + final long[] hash128 = MetroHash.hash128(str).getLongArray(); + Assert.assertEquals(4629350038757384271L, hash128[0]); + Assert.assertEquals(-1607305036506152112L, hash128[1]); + } /** * 数据量越大 MetroHash 优势越明显, */ @Test + @Ignore public void bulkHashing64Test() { String[] strArray = getRandomStringArray(10000000); long startCity = System.currentTimeMillis(); @@ -24,7 +50,7 @@ public class MetroHashTest { long startMetro = System.currentTimeMillis(); for (String s : strArray) { - MetroHash.hash64(s); + MetroHash.hash64(StrUtil.utf8Bytes(s)); } long endMetro = System.currentTimeMillis(); @@ -37,6 +63,7 @@ public class MetroHashTest { * 数据量越大 MetroHash 优势越明显, */ @Test + @Ignore public void bulkHashing128Test() { String[] strArray = getRandomStringArray(10000000); long startCity = System.currentTimeMillis(); @@ -47,7 +74,7 @@ public class MetroHashTest { long startMetro = System.currentTimeMillis(); for (String s : strArray) { - MetroHash.hash128(s); + MetroHash.hash128(StrUtil.utf8Bytes(s)); } long endMetro = System.currentTimeMillis(); diff --git a/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java new file mode 100755 index 000000000..fcf946b5e --- /dev/null +++ b/hutool-core/src/test/java/cn/hutool/core/lang/hash/MurMurHashTest.java @@ -0,0 +1,36 @@ +package cn.hutool.core.lang.hash; + +import cn.hutool.core.util.StrUtil; +import org.junit.Assert; +import org.junit.Test; + +public class MurMurHashTest { + + @Test + public void hash32Test() { + int hv = MurmurHash.hash32(StrUtil.utf8Bytes("你")); + Assert.assertEquals(222142701, hv); + + hv = MurmurHash.hash32(StrUtil.utf8Bytes("你好")); + Assert.assertEquals(1188098267, hv); + + hv = MurmurHash.hash32(StrUtil.utf8Bytes("见到你很高兴")); + Assert.assertEquals(-1898490321, hv); + hv = MurmurHash.hash32(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件")); + Assert.assertEquals(-1713131054, hv); + } + + @Test + public void hash64Test() { + long hv = MurmurHash.hash64(StrUtil.utf8Bytes("你")); + Assert.assertEquals(-1349759534971957051L, hv); + + hv = MurmurHash.hash64(StrUtil.utf8Bytes("你好")); + Assert.assertEquals(-7563732748897304996L, hv); + + hv = MurmurHash.hash64(StrUtil.utf8Bytes("见到你很高兴")); + Assert.assertEquals(-766658210119995316L, hv); + hv = MurmurHash.hash64(StrUtil.utf8Bytes("我们将通过生成一个大的文件的方式来检验各种方法的执行效率因为这种方式在结束的时候需要执行文件")); + Assert.assertEquals(-7469283059271653317L, hv); + } +} diff --git a/hutool-core/src/test/java/cn/hutool/core/util/ByteUtilTest.java b/hutool-core/src/test/java/cn/hutool/core/util/ByteUtilTest.java index b68b95ce9..ba7121f53 100644 --- a/hutool-core/src/test/java/cn/hutool/core/util/ByteUtilTest.java +++ b/hutool-core/src/test/java/cn/hutool/core/util/ByteUtilTest.java @@ -10,9 +10,16 @@ public class ByteUtilTest { @Test public void intAndBytesLittleEndianTest() { // 测试 int 转小端序 byte 数组 - int int1 = RandomUtil.randomInt(); + int int1 = RandomUtil.randomInt((Integer.MAX_VALUE)); + + ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putInt(int1); + byte[] bytesIntFromBuffer = buffer.array(); byte[] bytesInt = ByteUtil.intToBytes(int1, ByteOrder.LITTLE_ENDIAN); + Assert.assertArrayEquals(bytesIntFromBuffer, bytesInt); + int int2 = ByteUtil.bytesToInt(bytesInt, ByteOrder.LITTLE_ENDIAN); Assert.assertEquals(int1, int2); @@ -28,8 +35,14 @@ public class ByteUtilTest { @Test public void intAndBytesBigEndianTest() { // 测试 int 转大端序 byte 数组 - int int2 = RandomUtil.randomInt(); + int int2 = RandomUtil.randomInt(Integer.MAX_VALUE); + + ByteBuffer buffer = ByteBuffer.allocate(Integer.BYTES); + buffer.putInt(int2); + byte[] bytesIntFromBuffer = buffer.array(); + byte[] bytesInt = ByteUtil.intToBytes(int2, ByteOrder.BIG_ENDIAN); + Assert.assertArrayEquals(bytesIntFromBuffer, bytesInt); // 测试大端序 byte 数组转 int int int3 = ByteUtil.bytesToInt(bytesInt, ByteOrder.BIG_ENDIAN); @@ -39,9 +52,16 @@ public class ByteUtilTest { @Test public void longAndBytesLittleEndianTest() { // 测试 long 转 byte 数组 - long long1 = 2223; + long long1 = RandomUtil.randomLong(Long.MAX_VALUE); + + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); + buffer.order(ByteOrder.LITTLE_ENDIAN); + buffer.putLong(long1); + byte[] bytesLongFromBuffer = buffer.array(); byte[] bytesLong = ByteUtil.longToBytes(long1, ByteOrder.LITTLE_ENDIAN); + Assert.assertArrayEquals(bytesLongFromBuffer, bytesLong); + long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.LITTLE_ENDIAN); Assert.assertEquals(long1, long2); @@ -57,11 +77,16 @@ public class ByteUtilTest { @Test public void longAndBytesBigEndianTest() { // 测试大端序 long 转 byte 数组 - long long1 = 2223; + long long1 = RandomUtil.randomLong(Long.MAX_VALUE); + + ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); + buffer.putLong(long1); + byte[] bytesLongFromBuffer = buffer.array(); byte[] bytesLong = ByteUtil.longToBytes(long1, ByteOrder.BIG_ENDIAN); - long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.BIG_ENDIAN); + Assert.assertArrayEquals(bytesLongFromBuffer, bytesLong); + long long2 = ByteUtil.bytesToLong(bytesLong, ByteOrder.BIG_ENDIAN); Assert.assertEquals(long1, long2); } diff --git a/pom.xml b/pom.xml index 1a09fde76..e12460dc7 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,7 @@ hutool-captcha hutool-socket hutool-jwt - + utf-8 @@ -157,7 +157,7 @@ html xml - +