Merge pull request #3176 from feg545/v5-dev

增加按字节长度截断字符串方法
This commit is contained in:
Golden Looly 2023-06-30 10:29:21 +08:00 committed by GitHub
commit a51d0e6bd7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 122 additions and 9 deletions

View File

@ -1,17 +1,21 @@
package cn.hutool.core.util;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.text.StrFormatter;
import cn.hutool.core.text.StrPool;
import cn.hutool.core.text.TextSimilarity;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Map;
/**
* 字符串工具类
*
@ -265,8 +269,8 @@ public class StrUtil extends CharSequenceUtil implements StrPool {
*
* @param obj 对象
* @return 字符串
* @since 4.1.3
* @see String#valueOf(Object)
* @since 4.1.3
*/
public static String toString(Object obj) {
return String.valueOf(obj);
@ -468,4 +472,73 @@ public class StrUtil extends CharSequenceUtil implements StrPool {
public static String format(CharSequence template, Map<?, ?> map, boolean ignoreNull) {
return StrFormatter.format(template, map, ignoreNull);
}
/**
* 截断字符串使用其按照UTF-8编码为字节后不超过maxBytes长度截断后自动追加省略号(...)
* 用于存储数据库varchar且编码为UTF-8的字段
*
* @param str java字符串
* @param maxBytes 最大字节长度
* @return 截断后的字符
*/
public static String truncateUtf8(String str, int maxBytes) {
Charset charset = StandardCharsets.UTF_8;
//UTF-8编码单个字符最大长度4
return truncateByByteLength(str, charset, maxBytes, 4, true);
}
/**
* 截断字符串使用其按照GB18030编码为字节后不超过maxBytes长度截断后自动追加省略号(...)
* 用于存储数据库varchar且编码为GB2312GBKGB18030的字段
*
* @param str
* @param maxBytes
* @return
*/
public static String truncateGb18030(String str, int maxBytes) {
Charset charset = Charset.forName("GB18030");
//GB18030编码单个字符最大长度2
return truncateByByteLength(str, charset, maxBytes, 2, true);
}
/**
* 截断字符串使用其按照指定编码为字节后不超过maxBytes长度
*
* @param str 原始字符串
* @param charset 指定编码
* @param maxBytes 最大字节数
* @param factor 速算因子取该编码下单个字符的最大可能字节数
* @param appendDots 截断后是否追加省略号(...)
* @return
*/
public static String truncateByByteLength(String str, Charset charset, int maxBytes, int factor,
boolean appendDots) {
//字符数*速算因子<=最大字节数
if (str == null || str.length() * factor <= maxBytes) {
return str;
}
byte[] sba = str.getBytes(charset);
if (sba.length <= maxBytes) {
return str;
}
//限制字节数
int limitBytes;
if (appendDots) {
limitBytes = maxBytes - "...".getBytes(charset).length;
} else {
limitBytes = maxBytes;
}
ByteBuffer bb = ByteBuffer.wrap(sba, 0, limitBytes);
CharBuffer cb = CharBuffer.allocate(limitBytes);
CharsetDecoder decoder = charset.newDecoder();
//忽略被截断的字符
decoder.onMalformedInput(CodingErrorAction.IGNORE);
decoder.decode(bb, cb, true);
decoder.flush(cb);
String result = new String(cb.array(), 0, cb.position());
if (appendDots) {
return result + "...";
}
return result;
}
}

View File

@ -1,11 +1,12 @@
package cn.hutool.core.util;
import java.nio.charset.StandardCharsets;
import java.util.List;
import cn.hutool.core.lang.Dict;
import org.junit.Assert;
import org.junit.Test;
import java.util.List;
/**
* 字符串工具类单元测试
*
@ -648,4 +649,43 @@ public class StrUtilTest {
final String result = StrUtil.indexedFormat(template, 10);
Assert.assertEquals("I'm 10 years old.", result);
}
@Test
public void truncateUtf8Test() {
String str = "这是This一段中英文";
String ret = StrUtil.truncateUtf8(str, 12);
Assert.assertEquals("这是Thi...", ret);
ret = StrUtil.truncateUtf8(str, 13);
Assert.assertEquals("这是This...", ret);
ret = StrUtil.truncateUtf8(str, 14);
Assert.assertEquals("这是This...", ret);
ret = StrUtil.truncateUtf8(str, 999);
Assert.assertEquals(str, ret);
}
@Test
public void truncateGb18030Test() {
String str = "这是This一段中英文";
String ret = StrUtil.truncateGb18030(str, 12);
Assert.assertEquals("这是This...", ret);
ret = StrUtil.truncateGb18030(str, 13);
Assert.assertEquals("这是This一...", ret);
ret = StrUtil.truncateGb18030(str, 14);
Assert.assertEquals("这是This一...", ret);
ret = StrUtil.truncateGb18030(str, 999);
Assert.assertEquals(str, ret);
}
@Test
public void truncateByByteLengthTest() {
String str = "This is English";
String ret = StrUtil.truncateByByteLength(str, StandardCharsets.ISO_8859_1,10, 1, false);
Assert.assertEquals("This is En", ret);
}
}