fix textFinder

This commit is contained in:
Looly 2021-11-16 00:09:40 +08:00
parent 15753a5a31
commit 305fa44b55
12 changed files with 155 additions and 84 deletions

View File

@ -13,6 +13,7 @@
* 【core 】 Tree增加filter、filterNew、cloneTree、hasChild方法issue#I4HFC6@Gitee
* 【poi 】 增加ColumnSheetReader及ExcelReader.readColumn支持读取某一列
* 【core 】 IdCardUtil.isValidCard不再自动trimissue#I4I04O@Gitee
* 【core 】 IdCardUtil.isValidCard不再自动trimissue#I4I04O@Gitee
### 🐞Bug修复
* 【core 】 修复FileResource构造fileName参数无效问题issue#1942@Github

View File

@ -7,6 +7,8 @@ import cn.hutool.core.lang.Assert;
import cn.hutool.core.lang.Filter;
import cn.hutool.core.lang.Matcher;
import cn.hutool.core.lang.func.Func1;
import cn.hutool.core.text.finder.Finder;
import cn.hutool.core.text.finder.StrFinder;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.CharUtil;
import cn.hutool.core.util.CharsetUtil;
@ -35,7 +37,7 @@ import java.util.function.Predicate;
*/
public class CharSequenceUtil {
public static final int INDEX_NOT_FOUND = -1;
public static final int INDEX_NOT_FOUND = Finder.INDEX_NOT_FOUND;
/**
* 字符串常量{@code "null"} <br>
@ -1101,9 +1103,13 @@ public class CharSequenceUtil {
if (start < 0 || start > len) {
start = 0;
}
if (end > len || end < 0) {
if (end > len) {
end = len;
}
if (end < 0) {
end += len;
}
for (int i = start; i < end; i++) {
if (str.charAt(i) == searchChar) {
return i;
@ -1168,40 +1174,22 @@ public class CharSequenceUtil {
/**
* 指定范围内查找字符串
*
* @param str 字符串
* @param searchStr 需要查找位置的字符串
* @param fromIndex 起始位置
* @param text 字符串空则返回-1
* @param searchStr 需要查找位置的字符串空则返回-1
* @param from 起始位置包含
* @param ignoreCase 是否忽略大小写
* @return 位置
* @since 3.2.1
*/
public static int indexOf(final CharSequence str, CharSequence searchStr, int fromIndex, boolean ignoreCase) {
if (str == null || searchStr == null) {
return INDEX_NOT_FOUND;
}
if (fromIndex < 0) {
fromIndex = 0;
}
final int endLimit = str.length() - searchStr.length() + 1;
if (fromIndex > endLimit) {
return INDEX_NOT_FOUND;
}
if (searchStr.length() == 0) {
return fromIndex;
}
if (false == ignoreCase) {
// 不忽略大小写调用JDK方法
return str.toString().indexOf(searchStr.toString(), fromIndex);
}
for (int i = fromIndex; i < endLimit; i++) {
if (isSubEquals(str, i, searchStr, 0, searchStr.length(), true)) {
return i;
public static int indexOf(CharSequence text, CharSequence searchStr, int from, boolean ignoreCase) {
if (isEmpty(text) || isEmpty(searchStr)) {
if (StrUtil.equals(text, searchStr)) {
return 0;
} else {
return INDEX_NOT_FOUND;
}
}
return INDEX_NOT_FOUND;
return new StrFinder(searchStr, ignoreCase).setText(text).start(from);
}
/**
@ -1212,7 +1200,7 @@ public class CharSequenceUtil {
* @return 位置
* @since 3.2.1
*/
public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr) {
public static int lastIndexOfIgnoreCase(CharSequence str, CharSequence searchStr) {
return lastIndexOfIgnoreCase(str, searchStr, str.length());
}
@ -1226,7 +1214,7 @@ public class CharSequenceUtil {
* @return 位置
* @since 3.2.1
*/
public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr, int fromIndex) {
public static int lastIndexOfIgnoreCase(CharSequence str, CharSequence searchStr, int fromIndex) {
return lastIndexOf(str, searchStr, fromIndex, true);
}
@ -1234,37 +1222,22 @@ public class CharSequenceUtil {
* 指定范围内查找字符串<br>
* fromIndex 为搜索起始位置从后往前计数
*
* @param str 字符串
* @param text 字符串
* @param searchStr 需要查找位置的字符串
* @param fromIndex 起始位置从后往前计数
* @param from 起始位置从后往前计数
* @param ignoreCase 是否忽略大小写
* @return 位置
* @since 3.2.1
*/
public static int lastIndexOf(final CharSequence str, final CharSequence searchStr, int fromIndex, boolean ignoreCase) {
if (str == null || searchStr == null) {
return INDEX_NOT_FOUND;
}
if (fromIndex < 0) {
fromIndex = 0;
}
fromIndex = Math.min(fromIndex, str.length());
if (searchStr.length() == 0) {
return fromIndex;
}
if (false == ignoreCase) {
// 不忽略大小写调用JDK方法
return str.toString().lastIndexOf(searchStr.toString(), fromIndex);
}
for (int i = fromIndex; i >= 0; i--) {
if (isSubEquals(str, i, searchStr, 0, searchStr.length(), true)) {
return i;
public static int lastIndexOf(CharSequence text, CharSequence searchStr, int from, boolean ignoreCase) {
if (isEmpty(text) || isEmpty(searchStr)) {
if (StrUtil.equals(text, searchStr)) {
return 0;
} else {
return INDEX_NOT_FOUND;
}
}
return INDEX_NOT_FOUND;
return new StrFinder(searchStr, ignoreCase, true).setText(text).start(from);
}
/**

View File

@ -169,10 +169,9 @@ public class StrSplitter {
* @param ignoreEmpty 是否忽略空串
* @param ignoreCase 是否忽略大小写
* @return 切分后的集合
* @since 3.2.1
*/
public static List<String> split(CharSequence text, char separator, int limit, boolean isTrim, boolean ignoreEmpty, boolean ignoreCase) {
return split(text, separator, limit, ignoreEmpty, trimFunc(isTrim));
return split(text, separator, limit, ignoreEmpty, ignoreCase, trimFunc(isTrim));
}
/**

View File

@ -4,7 +4,8 @@ import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.NumberUtil;
/**
* 字符查找器
* 字符查找器<br>
* 查找指定字符在字符串中的位置信息
*
* @author looly
* @since 5.7.14
@ -38,8 +39,8 @@ public class CharFinder extends TextFinder {
@Override
public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!");
final int length = text.length();
for (int i = from; i < length; i++) {
final int limit = getValidEndIndex(false);
for (int i = from; i < limit; i++) {
if (NumberUtil.equals(c, text.charAt(i), caseInsensitive)) {
return i;
}

View File

@ -4,7 +4,8 @@ import cn.hutool.core.lang.Assert;
import cn.hutool.core.lang.Matcher;
/**
* 字符匹配查找器
* 字符匹配查找器<br>
* 查找满足指定{@link Matcher} 匹配的字符所在位置此类长用于查找某一类字符如数字等
*
* @since 5.7.14
* @author looly
@ -25,8 +26,8 @@ public class CharMatcherFinder extends TextFinder {
@Override
public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!");
final int length = text.length();
for (int i = from; i < length; i++) {
final int limit = getValidEndIndex(false);
for (int i = from; i < limit; i++) {
if(matcher.match(text.charAt(i))){
return i;
}

View File

@ -8,10 +8,12 @@ package cn.hutool.core.text.finder;
*/
public interface Finder {
int INDEX_NOT_FOUND = -1;
/**
* 返回开始位置即起始字符位置包含未找到返回-1
*
* @param from 查找的开始位置包含
* @param from 查找的开始位置包含
* @return 起始字符位置未找到返回-1
*/
int start(int from);

View File

@ -3,7 +3,8 @@ package cn.hutool.core.text.finder;
import cn.hutool.core.lang.Assert;
/**
* 固定长度查找器
* 固定长度查找器<br>
* 给定一个长度查找的位置为from + length一般用于分段截取
*
* @since 5.7.14
* @author looly
@ -25,7 +26,8 @@ public class LengthFinder extends TextFinder {
public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!");
final int result = from + length;
if(result < text.length()){
final int limit = getValidEndIndex(false);
if(result < limit){
return result;
}
return -1;

View File

@ -4,7 +4,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 正则查找器
* 正则查找器<br>
* 通过传入正则表达式查找指定字符串中匹配正则的开始和结束位置
*
* @author looly
* @since 5.7.14
@ -43,14 +44,24 @@ public class PatternFinder extends TextFinder {
@Override
public int start(int from) {
if (matcher.find(from)) {
return matcher.start();
// 只有匹配到的字符串结尾在limit范围内才算找到
if(matcher.end() <= getValidEndIndex(false)){
return matcher.start();
}
}
return -1;
return INDEX_NOT_FOUND;
}
@Override
public int end(int start) {
return matcher.end();
final int end = matcher.end();
final int limit;
if(endIndex < 0){
limit = text.length();
}else{
limit = Math.min(endIndex, text.length());
}
return end < limit ? end : INDEX_NOT_FOUND;
}
@Override

View File

@ -1,10 +1,10 @@
package cn.hutool.core.text.finder;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.text.CharSequenceUtil;
/**
* 字符查找器
* 字符查找器
*
* @author looly
* @since 5.7.14
@ -12,25 +12,59 @@ import cn.hutool.core.util.StrUtil;
public class StrFinder extends TextFinder {
private static final long serialVersionUID = 1L;
private final CharSequence str;
private final CharSequence strToFind;
private final boolean caseInsensitive;
private final boolean negative;
/**
* 构造
*
* @param str 被查找的字符串
* @param strToFind 被查找的字符串
* @param caseInsensitive 是否忽略大小写
*/
public StrFinder(CharSequence str, boolean caseInsensitive) {
Assert.notEmpty(str);
this.str = str;
public StrFinder(CharSequence strToFind, boolean caseInsensitive) {
this(strToFind, caseInsensitive, false);
}
/**
* 构造
*
* @param strToFind 被查找的字符串
* @param caseInsensitive 是否忽略大小写
* @param negative 是否从后向前查找模式
*/
public StrFinder(CharSequence strToFind, boolean caseInsensitive, boolean negative ) {
Assert.notEmpty(strToFind);
this.strToFind = strToFind;
this.caseInsensitive = caseInsensitive;
this.negative = negative ;
}
@Override
public int start(int from) {
Assert.notNull(this.text, "Text to find must be not null!");
return StrUtil.indexOf(text, str, from, caseInsensitive);
final int subLen = strToFind.length();
if (from < 0) {
from = 0;
}
int endLimit = getValidEndIndex(negative);
if(negative){
for (int i = from; i > endLimit; i--) {
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i;
}
}
} else {
endLimit = endLimit - subLen + 1;
for (int i = from; i < endLimit; i++) {
if (CharSequenceUtil.isSubEquals(text, i, strToFind, 0, subLen, caseInsensitive)) {
return i;
}
}
}
return INDEX_NOT_FOUND;
}
@Override
@ -38,6 +72,6 @@ public class StrFinder extends TextFinder {
if (start < 0) {
return -1;
}
return start + str.length();
return start + strToFind.length();
}
}

View File

@ -1,5 +1,7 @@
package cn.hutool.core.text.finder;
import cn.hutool.core.lang.Assert;
import java.io.Serializable;
/**
@ -12,6 +14,7 @@ public abstract class TextFinder implements Finder, Serializable {
private static final long serialVersionUID = 1L;
protected CharSequence text;
protected int endIndex = -1;
/**
* 设置被查找的文本
@ -20,7 +23,37 @@ public abstract class TextFinder implements Finder, Serializable {
* @return this
*/
public TextFinder setText(CharSequence text) {
this.text = text;
this.text = Assert.notNull(text, "Text must be not null!");
return this;
}
/**
* 设置查找的结束位置<br>
* 如果从前向后查找结束位置最大为text.length()<br>
* 如果从后向前结束位置为-1
*
* @param endIndex 结束位置不包括
* @return this
*/
public TextFinder setEndIndex(int endIndex) {
this.endIndex = endIndex;
return this;
}
/**
* 获取有效结束位置<br>
* 如果{@link #endIndex}小于0在反向模式下是开头-1正向模式是结尾text.length()
*
* @param negative 是否从后向前查找模式
* @return 有效结束位置
*/
protected int getValidEndIndex(boolean negative) {
final int limit;
if (endIndex < 0) {
limit = negative ? -1 : text.length();
} else {
limit = Math.min(endIndex, text.length());
}
return limit;
}
}

View File

@ -135,4 +135,18 @@ public class SplitIterTest {
final List<String> strings = splitIter.toList(false);
Assert.assertEquals(1, strings.size());
}
// 切割字符串是空字符串时报错
@Test(expected = IllegalArgumentException.class)
public void splitByEmptyTest(){
String text = "aa,bb,cc";
SplitIter splitIter = new SplitIter(text,
new StrFinder("", false),
3,
false
);
final List<String> strings = splitIter.toList(false);
Assert.assertEquals(1, strings.size());
}
}

View File

@ -177,7 +177,7 @@ public class StrUtilTest {
Assert.assertEquals(5, StrUtil.indexOfIgnoreCase("aabaabaa", "B", 3));
Assert.assertEquals(-1, StrUtil.indexOfIgnoreCase("aabaabaa", "B", 9));
Assert.assertEquals(2, StrUtil.indexOfIgnoreCase("aabaabaa", "B", -1));
Assert.assertEquals(2, StrUtil.indexOfIgnoreCase("aabaabaa", "", 2));
Assert.assertEquals(-1, StrUtil.indexOfIgnoreCase("aabaabaa", "", 2));
Assert.assertEquals(-1, StrUtil.indexOfIgnoreCase("abc", "", 9));
}
@ -199,8 +199,8 @@ public class StrUtilTest {
Assert.assertEquals(2, StrUtil.lastIndexOfIgnoreCase("aabaabaa", "B", 3));
Assert.assertEquals(5, StrUtil.lastIndexOfIgnoreCase("aabaabaa", "B", 9));
Assert.assertEquals(-1, StrUtil.lastIndexOfIgnoreCase("aabaabaa", "B", -1));
Assert.assertEquals(2, StrUtil.lastIndexOfIgnoreCase("aabaabaa", "", 2));
Assert.assertEquals(3, StrUtil.lastIndexOfIgnoreCase("abc", "", 9));
Assert.assertEquals(-1, StrUtil.lastIndexOfIgnoreCase("aabaabaa", "", 2));
Assert.assertEquals(-1, StrUtil.lastIndexOfIgnoreCase("abc", "", 9));
Assert.assertEquals(0, StrUtil.lastIndexOfIgnoreCase("AAAcsd", "aaa"));
}