fix #I2BMP1

This commit is contained in:
Looly 2021-01-03 12:12:02 +08:00
parent bcaf05c589
commit 3c2f0e46b0
7 changed files with 202 additions and 75 deletions

View File

@ -3,7 +3,7 @@
-------------------------------------------------------------------------------------------------------------
# 5.5.7 (2021-01-02)
# 5.5.7 (2021-01-03)
### 新特性
* 【core 】 DynaBean.create增加重载方法pr#245@Gitee
@ -13,6 +13,7 @@
* 【extra 】 MailUtil增加getSession方法
### Bug修复
* 【core 】 修复CsvReader读取双引号未转义问题issur#I2BMP1@Gitee
-------------------------------------------------------------------------------------------------------------

View File

@ -1,26 +1,34 @@
package cn.hutool.core.text.csv;
import java.io.Serializable;
import cn.hutool.core.util.CharUtil;
import java.io.Serializable;
/**
* CSV基础配置项
*
* CSV基础配置项此配置项可用于读取和写出CSV定义了包括字段分隔符文本包装符等符号
*
* @author looly
* @since 4.0.5
*/
public class CsvConfig implements Serializable{
public class CsvConfig implements Serializable {
private static final long serialVersionUID = -8069578249066158459L;
/** 字段分隔符,默认逗号',' */
/**
* 字段分隔符默认逗号','
*/
protected char fieldSeparator = CharUtil.COMMA;
/** 文本分隔符,文本包装符,默认双引号'"' */
/**
* 文本包装符默认双引号'"'
*/
protected char textDelimiter = CharUtil.DOUBLE_QUOTES;
/**
* 注释符号用于区分注释行默认'#'
*/
protected char commentCharacter = '#';
/**
* 设置字段分隔符默认逗号','
*
*
* @param fieldSeparator 字段分隔符默认逗号','
*/
public void setFieldSeparator(final char fieldSeparator) {
@ -29,10 +37,20 @@ public class CsvConfig implements Serializable{
/**
* 设置 文本分隔符文本包装符默认双引号'"'
*
*
* @param textDelimiter 文本分隔符文本包装符默认双引号'"'
*/
public void setTextDelimiter(char textDelimiter) {
this.textDelimiter = textDelimiter;
}
/**
* 设置 注释符号用于区分注释行
*
* @param commentCharacter 注释符号用于区分注释行
* @since 5.5.7
*/
public void setCommentCharacter(char commentCharacter) {
this.commentCharacter = commentCharacter;
}
}

View File

@ -32,33 +32,55 @@ public final class CsvParser implements Closeable, Serializable {
private final CsvReadConfig config;
private final char[] buf = new char[IoUtil.DEFAULT_LARGE_BUFFER_SIZE];
/** 当前位置 */
/**
* 当前位置
*/
private int bufPos;
/** 读取一段后数据长度 */
/**
* 读取一段后数据长度
*/
private int bufLen;
/** 拷贝开始的位置,一般为上一行的结束位置 */
/**
* 拷贝开始的位置一般为上一行的结束位置
*/
private int copyStart;
/** 前一个特殊分界字符 */
/**
* 前一个特殊分界字符
*/
private int preChar = -1;
/** 是否在引号包装内 */
/**
* 是否在引号包装内
*/
private boolean inQuotes;
/** 当前读取字段 */
/**
* 当前读取字段
*/
private final StrBuilder currentField = new StrBuilder(512);
/** 标题行 */
/**
* 标题行
*/
private CsvRow header;
/** 当前行号 */
/**
* 当前行号
*/
private long lineNo;
/** 第一行字段数,用于检查每行字段数是否一致 */
/**
* 第一行字段数用于检查每行字段数是否一致
*/
private int firstLineFieldCount = -1;
/** 最大字段数量 */
/**
* 最大字段数量
*/
private int maxFieldCount;
/** 是否读取结束 */
/**
* 是否读取结束
*/
private boolean finished;
/**
* CSV解析器
*
*
* @param reader Reader
* @param config 配置null则为默认配置
*/
@ -84,7 +106,7 @@ public final class CsvParser implements Closeable, Serializable {
}
/**
*读取下一行数据
* 读取下一行数据
*
* @return CsvRow
* @throws IORuntimeException IO读取异常
@ -97,7 +119,7 @@ public final class CsvParser implements Closeable, Serializable {
startingLineNo = ++lineNo;
currentFields = readLine();
fieldCount = currentFields.size();
if(fieldCount < 1){
if (fieldCount < 1) {
break;
}
@ -135,24 +157,24 @@ public final class CsvParser implements Closeable, Serializable {
/**
* 当前行做为标题行
*
*
* @param currentFields 当前行字段列表
*/
private void initHeader(final List<String> currentFields) {
final Map<String, Integer> localHeaderMap = new LinkedHashMap<>(currentFields.size());
for (int i = 0; i < currentFields.size(); i++) {
final String field = currentFields.get(i);
if (StrUtil.isNotEmpty(field) && false ==localHeaderMap.containsKey(field)) {
if (StrUtil.isNotEmpty(field) && false == localHeaderMap.containsKey(field)) {
localHeaderMap.put(field, i);
}
}
header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
header = new CsvRow(this.lineNo, Collections.unmodifiableMap(localHeaderMap), Collections.unmodifiableList(currentFields));
}
/**
* 读取一行数据
*
*
* @return 一行数据
* @throws IORuntimeException IO异常
*/
@ -185,7 +207,7 @@ public final class CsvParser implements Closeable, Serializable {
if (localPreChar == config.fieldSeparator || localCurrentField.hasContent()) {
//剩余部分作为一个字段
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
addField(currentFields, localCurrentField.toStringAndReset());
}
break;
}
@ -208,36 +230,40 @@ public final class CsvParser implements Closeable, Serializable {
}
copyLen++;
} else {
// 非引号内
if (c == config.fieldSeparator) {
//一个字段结束
if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen);
copyLen = 0;
}
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
addField(currentFields, localCurrentField.toStringAndReset());
localCopyStart = localBufPos;
} else if (c == config.textDelimiter) {
// 引号开始
inQuotes = true;
copyLen++;
} else if (c == CharUtil.CR) {
// \r直接结束
if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen);
}
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
addField(currentFields, localCurrentField.toStringAndReset());
localPreChar = c;
localCopyStart = localBufPos;
break;
} else if (c == CharUtil.LF) {
// \n
if (localPreChar != CharUtil.CR) {
if (copyLen > 0) {
localCurrentField.append(localBuf, localCopyStart, copyLen);
}
currentFields.add(StrUtil.unWrap(localCurrentField.toStringAndReset(), config.textDelimiter));
addField(currentFields, localCurrentField.toStringAndReset());
localPreChar = c;
localCopyStart = localBufPos;
break;
}
// 前一个字符是\r已经处理过这个字段了此处直接跳过
localCopyStart = localBufPos;
} else {
copyLen++;
@ -254,9 +280,22 @@ public final class CsvParser implements Closeable, Serializable {
return currentFields;
}
@Override
public void close() throws IOException {
reader.close();
}
/**
* 将字段加入字段列表并自动去包装和去转义
*
* @param currentFields 当前的字段列表即为行
* @param field 字段
*/
private void addField(List<String> currentFields, String field) {
field = StrUtil.unWrap(field, config.textDelimiter);
char textDelimiter = this.config.textDelimiter;
field = StrUtil.replace(field, "" + textDelimiter + textDelimiter, textDelimiter + "");
currentFields.add(StrUtil.unWrap(field, textDelimiter));
}
}

View File

@ -29,17 +29,24 @@ import java.util.Collection;
public final class CsvWriter implements Closeable, Flushable, Serializable {
private static final long serialVersionUID = 1L;
/** 写出器 */
/**
* 写出器
*/
private final Writer writer;
/** 写出配置 */
/**
* 写出配置
*/
private final CsvWriteConfig config;
/** 是否处于新行开始 */
/**
* 是否处于新行开始
*/
private boolean newline = true;
// --------------------------------------------------------------------------------------------------- Constructor start
/**
* 构造覆盖已有文件如果存在默认编码UTF-8
*
*
* @param filePath File CSV文件路径
*/
public CsvWriter(String filePath) {
@ -48,7 +55,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造覆盖已有文件如果存在默认编码UTF-8
*
*
* @param file File CSV文件
*/
public CsvWriter(File file) {
@ -57,9 +64,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造覆盖已有文件如果存在
*
*
* @param filePath File CSV文件路径
* @param charset 编码
* @param charset 编码
*/
public CsvWriter(String filePath, Charset charset) {
this(FileUtil.file(filePath), charset);
@ -67,8 +74,8 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造覆盖已有文件如果存在
*
* @param file File CSV文件
*
* @param file File CSV文件
* @param charset 编码
*/
public CsvWriter(File file, Charset charset) {
@ -77,9 +84,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造
*
*
* @param filePath File CSV文件路径
* @param charset 编码
* @param charset 编码
* @param isAppend 是否追加
*/
public CsvWriter(String filePath, Charset charset, boolean isAppend) {
@ -88,9 +95,9 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造
*
* @param file CSV文件
* @param charset 编码
*
* @param file CSV文件
* @param charset 编码
* @param isAppend 是否追加
*/
public CsvWriter(File file, Charset charset, boolean isAppend) {
@ -99,11 +106,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造
*
*
* @param filePath CSV文件路径
* @param charset 编码
* @param charset 编码
* @param isAppend 是否追加
* @param config 写出配置null则使用默认配置
* @param config 写出配置null则使用默认配置
*/
public CsvWriter(String filePath, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.file(filePath), charset, isAppend, config);
@ -111,11 +118,11 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造
*
* @param file CSV文件
* @param charset 编码
*
* @param file CSV文件
* @param charset 编码
* @param isAppend 是否追加
* @param config 写出配置null则使用默认配置
* @param config 写出配置null则使用默认配置
*/
public CsvWriter(File file, Charset charset, boolean isAppend, CsvWriteConfig config) {
this(FileUtil.getWriter(file, charset, isAppend), config);
@ -123,7 +130,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造使用默认配置
*
*
* @param writer {@link Writer}
*/
public CsvWriter(Writer writer) {
@ -132,7 +139,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 构造
*
*
* @param writer Writer
* @param config 写出配置null则使用默认配置
*/
@ -144,7 +151,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 设置是否始终使用文本分隔符文本包装符默认false按需添加
*
*
* @param alwaysDelimitText 是否始终使用文本分隔符文本包装符默认false按需添加
* @return this
*/
@ -155,7 +162,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 设置换行符
*
*
* @param lineDelimiter 换行符
* @return this
*/
@ -166,7 +173,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 将多行写出到Writer
*
*
* @param lines 多行数据
* @return this
* @throws IORuntimeException IO异常
@ -183,7 +190,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
/**
* 将多行写出到Writer
*
*
* @param lines 多行数据每行数据可以是集合或者数组
* @return this
* @throws IORuntimeException IO异常
@ -198,18 +205,55 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
return this;
}
/**
* 写出一行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @return this
* @throws IORuntimeException IO异常
* @since 5.5.7
*/
public CsvWriter writeLine(String... fields) throws IORuntimeException {
if (ArrayUtil.isEmpty(fields)) {
return writeLine();
}
appendLine(fields);
return this;
}
/**
* 追加新行换行
*
* @throws IORuntimeException IO异常
*/
public void writeLine() throws IORuntimeException {
public CsvWriter writeLine() throws IORuntimeException {
try {
writer.write(config.lineDelimiter);
} catch (IOException e) {
throw new IORuntimeException(e);
}
newline = true;
return this;
}
/**
* 写出一行注释注释符号可自定义
*
* @param comment 注释内容
* @return this
* @see CsvConfig#commentCharacter
* @since 5.5.7
*/
public CsvWriter writeComment(String comment) {
try {
writer.write(this.config.commentCharacter);
writer.write(comment);
writer.write(config.lineDelimiter);
newline = true;
} catch (IOException e) {
throw new IORuntimeException(e);
}
return this;
}
@Override
@ -227,13 +271,14 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
}
// --------------------------------------------------------------------------------------------------- Private method start
/**
* 追加一行末尾会自动换行但是追加前不会换行
*
* @param fields 字段列表 ({@code null} 值会被做为空值追加)
* @throws IORuntimeException IO异常
*/
private void appendLine(final String... fields) throws IORuntimeException {
private void appendLine(String... fields) throws IORuntimeException {
try {
doAppendLine(fields);
} catch (IOException e) {
@ -276,7 +321,7 @@ public final class CsvWriter implements Closeable, Flushable, Serializable {
if (null == value) {
if (alwaysDelimitText) {
writer.write(new char[] { textDelimiter, textDelimiter });
writer.write(new char[]{textDelimiter, textDelimiter});
}
return;
}

View File

@ -47,7 +47,7 @@ public class CharUtil {
public static final char AMP = '&';
/** 字符常量:冒号 {@code ':'} */
public static final char COLON = ':';
/** 字符常量:艾特 <code>'@'</code> */
/** 字符常量:艾特 {@code '@'} */
public static final char AT = '@';
/**

View File

@ -1,8 +1,9 @@
package cn.hutool.core.text.csv;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Assert;
import cn.hutool.core.lang.Console;
import cn.hutool.core.util.CharsetUtil;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
@ -16,26 +17,49 @@ public class CsvUtilTest {
//从文件中读取CSV数据
CsvData data = reader.read(FileUtil.file("test.csv"));
List<CsvRow> rows = data.getRows();
for (CsvRow csvRow : rows) {
Assert.notEmpty(csvRow.getRawList());
}
final CsvRow row0 = rows.get(0);
Assert.assertEquals("sss,sss", row0.get(0));
Assert.assertEquals("姓名", row0.get(1));
Assert.assertEquals("性别", row0.get(2));
Assert.assertEquals("关注\"对象\"", row0.get(3));
Assert.assertEquals("年龄", row0.get(4));
Assert.assertEquals("", row0.get(5));
Assert.assertEquals("\"", row0.get(6));
}
@Test
public void readTest2() {
CsvReader reader = CsvUtil.getReader();
reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> Assert.notEmpty(csvRow.getRawList()));
reader.read(FileUtil.getUtf8Reader("test.csv"), (csvRow)-> {
// 只有一行所以直接判断
Assert.assertEquals("sss,sss", csvRow.get(0));
Assert.assertEquals("姓名", csvRow.get(1));
Assert.assertEquals("性别", csvRow.get(2));
Assert.assertEquals("关注\"对象\"", csvRow.get(3));
Assert.assertEquals("年龄", csvRow.get(4));
Assert.assertEquals("", csvRow.get(5));
Assert.assertEquals("\"", csvRow.get(6));
});
}
@Test
@Ignore
public void writeTest() {
CsvWriter writer = CsvUtil.getWriter("e:/testWrite.csv", CharsetUtil.CHARSET_UTF_8);
CsvWriter writer = CsvUtil.getWriter("d:/test/testWrite.csv", CharsetUtil.CHARSET_UTF_8);
writer.write(
new String[] {"a1", "b1", "c1", "123345346456745756756785656"},
new String[] {"a2", "b2", "c2"},
new String[] {"a3", "b3", "c3"}
);
}
@Test
@Ignore
public void readLfTest(){
final CsvReader reader = CsvUtil.getReader();
final CsvData read = reader.read(FileUtil.file("d:/test/rw_test.csv"));
for (CsvRow row : read) {
Console.log(row);
}
}
}

View File

@ -1 +1 @@
"sss,sss",姓名,"性别",关注"对象",年龄
"sss,sss",姓名,"性别",关注"对象",年龄,"","""
Can't render this file because it contains an unexpected character in line 1 and column 33.