remove invalid

This commit is contained in:
Looly 2022-04-30 12:37:10 +08:00
parent 8034289b6f
commit a9c4592663
24 changed files with 135 additions and 702 deletions

View File

@ -1,35 +0,0 @@
package cn.hutool.core.map.bitMap;
/**
* BitMap接口用于将某个int或long值映射到一个数组中从而判定某个值是否存在
*
* @author looly
*
*/
public interface BitMap{
int MACHINE32 = 32;
int MACHINE64 = 64;
/**
* 加入值
*
* @param i
*/
void add(long i);
/**
* 检查是否包含值
*
* @param i
* @return 是否包含
*/
boolean contains(long i);
/**
* 移除值
*
* @param i
*/
void remove(long i);
}

View File

@ -1,53 +0,0 @@
package cn.hutool.core.map.bitMap;
import java.io.Serializable;
/**
* 过滤器BitMap在32位机器上.这个类能发生更好的效果.一般情况下建议使用此类
*
* @author loolly
*
*/
public class IntMap implements BitMap, Serializable {
private static final long serialVersionUID = 1L;
private final int[] ints;
/**
* 构造
*/
public IntMap() {
ints = new int[93750000];
}
/**
* 构造
*
* @param size 容量
*/
public IntMap(int size) {
ints = new int[size];
}
@Override
public void add(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i & (BitMap.MACHINE32 - 1));
ints[r] = ints[r] | (1 << c);
}
@Override
public boolean contains(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i & (BitMap.MACHINE32 - 1));
return ((ints[r] >>> c) & 1) == 1;
}
@Override
public void remove(long i) {
int r = (int) (i / BitMap.MACHINE32);
int c = (int) (i & (BitMap.MACHINE32 - 1));
ints[r] &= ~(1 << c);
}
}

View File

@ -1,53 +0,0 @@
package cn.hutool.core.map.bitMap;
import java.io.Serializable;
/**
* 过滤器BitMap在64位机器上.这个类能发生更好的效果.一般机器不建议使用
*
* @author loolly
*
*/
public class LongMap implements BitMap, Serializable {
private static final long serialVersionUID = 1L;
private final long[] longs;
/**
* 构造
*/
public LongMap() {
longs = new long[93750000];
}
/**
* 构造
*
* @param size 容量
*/
public LongMap(int size) {
longs = new long[size];
}
@Override
public void add(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i & (BitMap.MACHINE64 - 1);
longs[r] = longs[r] | (1L << c);
}
@Override
public boolean contains(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i & (BitMap.MACHINE64 - 1);
return ((longs[r] >>> c) & 1) == 1;
}
@Override
public void remove(long i) {
int r = (int) (i / BitMap.MACHINE64);
long c = i & (BitMap.MACHINE64 - 1);
longs[r] &= ~(1L << c);
}
}

View File

@ -1,7 +0,0 @@
/**
* BitMap实现
*
* @author looly
*
*/
package cn.hutool.core.map.bitMap;

View File

@ -0,0 +1,50 @@
package cn.hutool.core.text.bloom;
import java.util.BitSet;
/**
* 抽象Bloom过滤器
*
* @author looly
*/
public abstract class AbstractFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
private final BitSet bitSet;
protected int size;
/**
* 构造
*
* @param size 容量
*/
public AbstractFilter(int size) {
this.size = size;
this.bitSet = new BitSet(size);
}
@Override
public boolean contains(String str) {
return bitSet.get(Math.abs(hash(str)));
}
@Override
public boolean add(String str) {
final int hash = Math.abs(hash(str));
if (bitSet.get(hash)) {
return false;
}
bitSet.set(hash);
return true;
}
/**
* 自定义Hash方法
*
* @param str 字符串
* @return HashCode
*/
public abstract int hash(String str);
}

View File

@ -1,82 +0,0 @@
package cn.hutool.core.text.bloom;
import cn.hutool.core.text.bloom.filter.DefaultFilter;
import cn.hutool.core.text.bloom.filter.ELFFilter;
import cn.hutool.core.text.bloom.filter.JSFilter;
import cn.hutool.core.text.bloom.filter.PJWFilter;
import cn.hutool.core.text.bloom.filter.SDBMFilter;
import cn.hutool.core.util.NumberUtil;
/**
* BlommFilter 实现 <br>
* 1.构建hash算法 <br>
* 2.散列hash映射到数组的bit位置 <br>
* 3.验证<br>
* 此实现方式可以指定Hash算法
*
* @author Ansj
*/
public class BitMapBloomFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
private BloomFilter[] filters;
/**
* 构造使用默认的5个过滤器
*
* @param m M值决定BitMap的大小
*/
public BitMapBloomFilter(int m) {
long mNum = NumberUtil.div(String.valueOf(m), String.valueOf(5)).longValue();
long size = mNum * 1024 * 1024 * 8;
filters = new BloomFilter[]{
new DefaultFilter(size),
new ELFFilter(size),
new JSFilter(size),
new PJWFilter(size),
new SDBMFilter(size)
};
}
/**
* 使用自定的多个过滤器建立BloomFilter
*
* @param m M值决定BitMap的大小
* @param filters Bloom过滤器列表
*/
public BitMapBloomFilter(int m, BloomFilter... filters) {
this(m);
this.filters = filters;
}
/**
* 增加字符串到Filter映射中
*
* @param str 字符串
*/
@Override
public boolean add(String str) {
boolean flag = false;
for (BloomFilter filter : filters) {
flag |= filter.add(str);
}
return flag;
}
/**
* 是否可能包含此字符串此处存在误判
*
* @param str 字符串
* @return 是否存在
*/
@Override
public boolean contains(String str) {
for (BloomFilter filter : filters) {
if (filter.contains(str) == false) {
return false;
}
}
return true;
}
}

View File

@ -1,149 +0,0 @@
package cn.hutool.core.text.bloom;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.hash.HashUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.BitSet;
/**
* BloomFilter实现方式2此方式使用BitSet存储<br>
* Hash算法的使用使用固定顺序只需指定个数即可
*
* @author loolly
*/
public class BitSetBloomFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
private final BitSet bitSet;
private final int bitSetSize;
private final int addedElements;
private final int hashFunctionNumber;
/**
* 构造一个布隆过滤器过滤器的容量为c * n 个bit.
*
* @param c 当前过滤器预先开辟的最大包含记录,通常要比预计存入的记录多一倍.
* @param n 当前过滤器预计所要包含的记录.
* @param k 哈希函数的个数等同每条记录要占用的bit数.
*/
public BitSetBloomFilter(int c, int n, int k) {
this.hashFunctionNumber = k;
this.bitSetSize = (int) Math.ceil(c * k);
this.addedElements = n;
this.bitSet = new BitSet(this.bitSetSize);
}
/**
* 通过文件初始化过滤器.
*
* @param path 文件路径
* @param charset 字符集
* @throws IOException IO异常
* @since 5.8.0
*/
public void init(String path, Charset charset) throws IOException {
BufferedReader reader = FileUtil.getReader(path, charset);
try {
String line;
while (true) {
line = reader.readLine();
if (line == null) {
break;
}
this.add(line);
}
} finally {
IoUtil.close(reader);
}
}
@Override
public boolean add(String str) {
if (contains(str)) {
return false;
}
int[] positions = createHashes(str, hashFunctionNumber);
for (int value : positions) {
int position = Math.abs(value % bitSetSize);
bitSet.set(position, true);
}
return true;
}
/**
* 判定是否包含指定字符串
*
* @param str 字符串
* @return 是否包含存在误差
*/
@Override
public boolean contains(String str) {
int[] positions = createHashes(str, hashFunctionNumber);
for (int i : positions) {
int position = Math.abs(i % bitSetSize);
if (!bitSet.get(position)) {
return false;
}
}
return true;
}
/**
* @return 得到当前过滤器的错误率.
*/
public double getFalsePositiveProbability() {
// (1 - e^(-k * n / m)) ^ k
return Math.pow((1 - Math.exp(-hashFunctionNumber * (double) addedElements / bitSetSize)), hashFunctionNumber);
}
/**
* 将字符串的字节表示进行多哈希编码.
*
* @param str 待添加进过滤器的字符串字节表示.
* @param hashNumber 要经过的哈希个数.
* @return 各个哈希的结果数组.
*/
public static int[] createHashes(String str, int hashNumber) {
int[] result = new int[hashNumber];
for (int i = 0; i < hashNumber; i++) {
result[i] = hash(str, i);
}
return result;
}
/**
* 计算Hash值
*
* @param str 被计算Hash的字符串
* @param k Hash算法序号
* @return Hash值
*/
public static int hash(String str, int k) {
switch (k) {
case 0:
return HashUtil.rsHash(str);
case 1:
return HashUtil.jsHash(str);
case 2:
return HashUtil.elfHash(str);
case 3:
return HashUtil.bkdrHash(str);
case 4:
return HashUtil.apHash(str);
case 5:
return HashUtil.djbHash(str);
case 6:
return HashUtil.sdbmHash(str);
case 7:
return HashUtil.pjwHash(str);
default:
return 0;
}
}
}

View File

@ -1,32 +0,0 @@
package cn.hutool.core.text.bloom;
/**
* 布隆过滤器工具
*
* @author looly
* @since 4.1.5
*/
public class BloomFilterUtil {
/**
* 创建一个BitSet实现的布隆过滤器过滤器的容量为c * n 个bit.
*
* @param c 当前过滤器预先开辟的最大包含记录,通常要比预计存入的记录多一倍.
* @param n 当前过滤器预计所要包含的记录.
* @param k 哈希函数的个数等同每条记录要占用的bit数.
* @return BitSetBloomFilter
*/
public static BitSetBloomFilter createBitSet(int c, int n, int k) {
return new BitSetBloomFilter(c, n, k);
}
/**
* 创建BitMap实现的布隆过滤器
*
* @param m BitMap的大小
* @return BitMapBloomFilter
*/
public static BitMapBloomFilter createBitMap(int m) {
return new BitMapBloomFilter(m);
}
}

View File

@ -0,0 +1,55 @@
package cn.hutool.core.text.bloom;
/**
* 组合BloomFilter 实现 <br>
* 1.构建hash算法 <br>
* 2.散列hash映射到数组的bit位置 <br>
* 3.验证<br>
* 此实现方式可以指定Hash算法
*
* @author Ansj
*/
public class CombinedBloomFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
private final BloomFilter[] filters;
/**
* 使用自定的多个过滤器建立BloomFilter
*
* @param filters Bloom过滤器列表
*/
public CombinedBloomFilter(BloomFilter... filters) {
this.filters = filters;
}
/**
* 增加字符串到Filter映射中
*
* @param str 字符串
*/
@Override
public boolean add(String str) {
boolean flag = false;
for (BloomFilter filter : filters) {
flag |= filter.add(str);
}
return flag;
}
/**
* 是否可能包含此字符串此处存在误判
*
* @param str 字符串
* @return 是否存在
*/
@Override
public boolean contains(String str) {
for (BloomFilter filter : filters) {
if (filter.contains(str) == false) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,29 @@
package cn.hutool.core.text.bloom;
import java.util.function.Function;
/**
* 基于Hash函数方法的{@link BloomFilter}
*
* @author looly
* @since 5.8.0
*/
public class FuncFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
private final Function<String, Number> hashFunc;
/**
* @param size 最大值
* @param hashFunc Hash函数
*/
public FuncFilter(int size, Function<String, Number> hashFunc) {
super(size);
this.hashFunc = hashFunc;
}
@Override
public int hash(String str) {
return hashFunc.apply(str).intValue() % size;
}
}

View File

@ -1,85 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.map.bitMap.BitMap;
import cn.hutool.core.map.bitMap.IntMap;
import cn.hutool.core.map.bitMap.LongMap;
import cn.hutool.core.text.bloom.BloomFilter;
/**
* 抽象Bloom过滤器
*
* @author loolly
*
*/
public abstract class AbstractFilter implements BloomFilter {
private static final long serialVersionUID = 1L;
protected static int DEFAULT_MACHINE_NUM = BitMap.MACHINE32;
private BitMap bm = null;
protected long size;
/**
* 构造
*
* @param maxValue 最大值
* @param machineNum 机器位数
*/
public AbstractFilter(long maxValue, int machineNum) {
init(maxValue, machineNum);
}
/**
* 构造32位
*
* @param maxValue 最大值
*/
public AbstractFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
/**
* 初始化
*
* @param maxValue 最大值
* @param machineNum 机器位数
*/
public void init(long maxValue, int machineNum) {
this.size = maxValue;
switch (machineNum) {
case BitMap.MACHINE32:
bm = new IntMap((int) (size / machineNum));
break;
case BitMap.MACHINE64:
bm = new LongMap((int) (size / machineNum));
break;
default:
throw new RuntimeException("Error Machine number!");
}
}
@Override
public boolean contains(String str) {
return bm.contains(Math.abs(hash(str)));
}
@Override
public boolean add(String str) {
final long hash = Math.abs(hash(str));
if (bm.contains(hash)) {
return false;
}
bm.add(hash);
return true;
}
/**
* 自定义Hash方法
*
* @param str 字符串
* @return HashCode
*/
public abstract long hash(String str);
}

View File

@ -1,20 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
/**
* 默认Bloom过滤器使用Java自带的Hash算法
*
* @author loolly
*/
public class DefaultFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public DefaultFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public DefaultFilter(long maxValue, int machineNumber) {
super(maxValue, machineNumber, HashUtil::javaDefaultHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class ELFFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public ELFFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public ELFFilter(long maxValue, int machineNumber) {
super(maxValue, machineNumber, HashUtil::elfHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class FNVFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public FNVFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public FNVFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::fnvHash);
}
}

View File

@ -1,42 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.text.bloom.BloomFilter;
import java.util.function.Function;
/**
* 基于Hash函数方法的{@link BloomFilter}
*
* @author looly
* @since 5.8.0
*/
public class FuncFilter extends AbstractFilter {
private static final long serialVersionUID = 1L;
private final Function<String, Number> hashFunc;
/**
* 构造
*
* @param maxValue 最大值
* @param hashFunc Hash函数
*/
public FuncFilter(long maxValue, Function<String, Number> hashFunc) {
this(maxValue, DEFAULT_MACHINE_NUM, hashFunc);
}
/**
* @param maxValue 最大值
* @param machineNum 机器位数
* @param hashFunc Hash函数
*/
public FuncFilter(long maxValue, int machineNum, Function<String, Number> hashFunc) {
super(maxValue, machineNum);
this.hashFunc = hashFunc;
}
@Override
public long hash(String str) {
return hashFunc.apply(str).longValue() % size;
}
}

View File

@ -1,16 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class HfFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public HfFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public HfFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::hfHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class HfIpFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public HfIpFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public HfIpFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::hfIpHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class JSFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public JSFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public JSFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::jsHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class PJWFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public PJWFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public PJWFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::pjwHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class RSFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public RSFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public RSFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::rsHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class SDBMFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public SDBMFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public SDBMFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::sdbmHash);
}
}

View File

@ -1,15 +0,0 @@
package cn.hutool.core.text.bloom.filter;
import cn.hutool.core.lang.hash.HashUtil;
public class TianlFilter extends FuncFilter {
private static final long serialVersionUID = 1L;
public TianlFilter(long maxValue) {
this(maxValue, DEFAULT_MACHINE_NUM);
}
public TianlFilter(long maxValue, int machineNum) {
super(maxValue, machineNum, HashUtil::tianlHash);
}
}

View File

@ -1,7 +0,0 @@
/**
* 各种Hash算法的过滤器实现
*
* @author looly
*
*/
package cn.hutool.core.text.bloom.filter;

View File

@ -10,7 +10,7 @@ public class BitMapBloomFilterTest {
@Test
public void filterTest() {
BitMapBloomFilter filter = new BitMapBloomFilter(10);
CombinedBloomFilter filter = new CombinedBloomFilter(10);
filter.add("123");
filter.add("abc");
filter.add("ddd");