FileUtil.getTotalLines()支持CR换行符

This commit is contained in:
Looly 2024-08-29 09:49:54 +08:00
parent 5bec26be49
commit 54c858b8f1
4 changed files with 42 additions and 6 deletions

View File

@ -70,8 +70,8 @@ public class LineCounter implements Closeable {
}
private int count() throws IOException {
final byte[] c = new byte[bufferSize];
int readChars = is.read(c);
final byte[] buf = new byte[bufferSize];
int readChars = is.read(buf);
if (readChars == -1) {
// 空文件返回0
return 0;
@ -82,23 +82,35 @@ public class LineCounter implements Closeable {
// 如果多行最后一行无换行符最后一行需要单独计数
// 如果多行最后一行有换行符则空行算作一行
int count = 1;
byte pre;
byte c = 0;
while (readChars == bufferSize) {
for (int i = 0; i < bufferSize; i++) {
if (c[i] == CharUtil.LF) {
pre = c;
c = buf[i];
// 换行符兼容MAC
if (c == CharUtil.LF || pre == CharUtil.CR) {
++count;
}
}
readChars = is.read(c);
readChars = is.read(buf);
}
// count remaining characters
while (readChars != -1) {
for (int i = 0; i < readChars; i++) {
if (c[i] == CharUtil.LF) {
pre = c;
c = buf[i];
if (c == CharUtil.LF || pre == CharUtil.CR) {
++count;
}
}
readChars = is.read(c);
readChars = is.read(buf);
}
// 最后一个字符为换行符则单独计数行
if(c == CharUtil.CR){
++count;
}
return count;

View File

@ -27,6 +27,8 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
/**
* {@link FileUtil} 单元测试类
*
@ -501,6 +503,20 @@ public class FileUtilTest {
Assertions.assertEquals(8, totalLines);
}
@Test
public void getTotalLinesCrTest() {
// 此文件最后一行有换行符则最后的空行算作一行
final int totalLines = FileUtil.getTotalLines(FileUtil.file("test_lines_cr.csv"));
assertEquals(8, totalLines);
}
@Test
public void getTotalLinesCrlfTest() {
// 此文件最后一行有换行符则最后的空行算作一行
final int totalLines = FileUtil.getTotalLines(FileUtil.file("test_lines_crlf.csv"));
assertEquals(8, totalLines);
}
@Test
public void issue3591Test() {
// 此文件最后一行末尾无换行符

View File

@ -0,0 +1 @@
# 这是一行注释,读取时应忽略 a,b,c,d 1,2,3,4 # 这是一行注释,读取时应忽略 q,w,e,r,"我是一段 带换行的内容" a,s,d,f
Can't render this file because it contains an unexpected character in line 1 and column 141.

View File

@ -0,0 +1,7 @@
# 这是一行注释,读取时应忽略
a,b,c,d
1,2,3,4
# 这是一行注释,读取时应忽略
q,w,e,r,"我是一段
带换行的内容"
a,s,d,f
Can't render this file because it has a wrong number of fields in line 2.