Skip to content

Commit fb62410

Browse files
committed
Add full support of quoting
1 parent dc5d73c commit fb62410

File tree

8 files changed

+364
-242
lines changed

8 files changed

+364
-242
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Add buffer relocation in reader to stabilize performances
13+
- Add full support of quoting [#211](https://github.com/nbbrd/picocsv/issues/211)
1314

1415
### Changed
1516

src/main/java/nbbrd/picocsv/Csv.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,13 @@ public interface LineReader extends CharSequence {
566566
* @return <code>true</code> if the current line is a comment, <code>false</code> otherwise
567567
*/
568568
boolean isComment();
569+
570+
/**
571+
* Check if the current field is a quoted or not.
572+
*
573+
* @return <code>true</code> if the current field is quoted, <code>false</code> otherwise
574+
*/
575+
boolean isQuoted();
569576
}
570577

571578
/**
@@ -801,6 +808,11 @@ public boolean isComment() {
801808
return fieldType == FIELD_TYPE_COMMENTED;
802809
}
803810

811+
@Override
812+
public boolean isQuoted() {
813+
return fieldType == FIELD_TYPE_QUOTED;
814+
}
815+
804816
/**
805817
* Closes the {@link java.io.Reader chararcter stream} used by this reader.
806818
*
@@ -1171,6 +1183,14 @@ public interface LineWriter {
11711183
* @throws IOException if an I/O error occurs
11721184
*/
11731185
void writeField(CharSequence field) throws IOException;
1186+
1187+
/**
1188+
* Writes a new quoted field. Null is handled as empty.
1189+
*
1190+
* @param field a nullable field
1191+
* @throws IOException if an I/O error occurs
1192+
*/
1193+
void writeQuotedField(CharSequence field) throws IOException;
11741194
}
11751195

11761196
/**
@@ -1313,6 +1333,40 @@ public void writeField(CharSequence field) throws IOException {
13131333
}
13141334
}
13151335

1336+
@Override
1337+
public void writeQuotedField(CharSequence field) throws IOException {
1338+
boolean notEmpty = field != null && field.length() != 0;
1339+
switch (state) {
1340+
case STATE_0_NO_FIELD: {
1341+
state = STATE_2_MULTI_FIELD;
1342+
if (notEmpty) {
1343+
if (field.charAt(0) == comment) {
1344+
formatField(field, QUOTING_FULL, true);
1345+
} else {
1346+
formatField(field, parseForcedQuoting(field), true);
1347+
}
1348+
} else
1349+
formatSingleEmptyField();
1350+
break;
1351+
}
1352+
case STATE_1_SINGLE_EMPTY_FIELD: {
1353+
state = STATE_2_MULTI_FIELD;
1354+
if (notEmpty) {
1355+
formatField(field, parseForcedQuoting(field), false);
1356+
} else
1357+
formatEmptyQuotedField();
1358+
break;
1359+
}
1360+
case STATE_2_MULTI_FIELD: {
1361+
if (notEmpty) {
1362+
formatField(field, parseForcedQuoting(field), false);
1363+
} else
1364+
formatEmptyQuotedField();
1365+
break;
1366+
}
1367+
}
1368+
}
1369+
13161370
/**
13171371
* Writes an end of line.
13181372
*
@@ -1411,6 +1465,12 @@ private void formatEmptyField() throws IOException {
14111465
appendChar(delimiter);
14121466
}
14131467

1468+
private void formatEmptyQuotedField() throws IOException {
1469+
appendChar(delimiter);
1470+
appendChar(quote);
1471+
appendChar(quote);
1472+
}
1473+
14141474
private void formatField(final CharSequence field, final int quoting, final boolean firstField) throws IOException {
14151475
if (!firstField) {
14161476
appendChar(delimiter);
@@ -1492,6 +1552,18 @@ private int parseQuoting(final CharSequence field) {
14921552
}
14931553
}
14941554

1555+
private int parseForcedQuoting(final CharSequence field) {
1556+
final char quote = this.quote;
1557+
final int length = field.length();
1558+
1559+
for (int p = 0; p < length; p++) {
1560+
if (field.charAt(p) == quote) {
1561+
return QUOTING_FULL;
1562+
}
1563+
}
1564+
return QUOTING_PARTIAL;
1565+
}
1566+
14951567
private void flushBuffer() throws IOException {
14961568
charWriter.write(buffer, 0, bufferLength);
14971569
bufferLength = 0;

src/test/java/_demo/Cookbook.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ public boolean isComment() {
126126
return false;
127127
}
128128

129+
@Override
130+
public boolean isQuoted() {
131+
return false;
132+
}
133+
129134
@Override
130135
public int length() {
131136
return fields[cursor].length();

src/test/java/_test/Row.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ public static class Fields extends Row {
4848
public static final Fields EMPTY_FIELD = new Fields(Collections.singletonList(""));
4949

5050
@lombok.NonNull
51-
List<String> fields;
51+
List<? extends CharSequence> fields;
5252

5353
@Override
5454
public String toString() {
55-
return "{" + fields.stream().map(StringEscapeUtils::escapeJava).collect(Collectors.joining("|")) + "}";
55+
return "{" + fields.stream().map(Object::toString).map(StringEscapeUtils::escapeJava).collect(Collectors.joining("|")) + "}";
5656
}
5757
}
5858

@@ -83,7 +83,7 @@ public static void writeAll(List<Row> rows, Csv.Writer writer) throws IOExceptio
8383
} else if (row instanceof Comment) {
8484
writer.writeComment(((Comment) row).getComment());
8585
} else if (row instanceof Fields) {
86-
for (String field : ((Fields) row).getFields()) {
86+
for (CharSequence field : ((Fields) row).getFields()) {
8787
writer.writeField(field);
8888
}
8989
writer.writeEndOfLine();

src/test/java/_test/Sample.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ public Builder rowComment(String comment) {
8181
return row(new Row.Comment(comment));
8282
}
8383

84-
public Builder rowFields(String... fields) {
84+
public Builder rowFields(CharSequence... fields) {
8585
return row(new Row.Fields(Arrays.asList(fields)));
8686
}
8787
}

src/test/java/_test/fastcsv/FastCsvEntryConverter.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ private static String toContent(String text) {
3939
.replace('␊', '\n');
4040
}
4141

42-
private static String fromContent(String text) {
42+
private static String fromContent(CharSequence text) {
4343
return text
44+
.toString()
4445
.replace(' ', '␣')
4546
.replace('\r', '␍')
4647
.replace('\n', '␊');
@@ -85,7 +86,7 @@ private static String fromRows(List<Row.Fields> rows) {
8586
private static String fromRow(Row.Fields row) {
8687
return row.getFields()
8788
.stream()
88-
.map(field -> field.isEmpty() ? "◯" : fromContent(field))
89+
.map(field -> field.length() == 0 ? "◯" : fromContent(field))
8990
.collect(Collectors.joining("↷"));
9091
}
9192
}

src/test/java/nbbrd/picocsv/CsvReaderTest.java

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import _test.fastcsv.FastCsvEntryConverter;
2626
import _test.fastcsv.FastCsvEntryRowsParser;
2727
import lombok.NonNull;
28-
import lombok.Value;
2928
import org.assertj.core.api.Condition;
3029
import org.assertj.core.condition.VerboseCondition;
3130
import org.junit.jupiter.api.Test;
@@ -35,6 +34,7 @@
3534

3635
import java.io.IOException;
3736
import java.io.StringReader;
37+
import java.io.StringWriter;
3838
import java.io.UncheckedIOException;
3939
import java.util.List;
4040
import java.util.function.Function;
@@ -89,6 +89,43 @@ public void testAllSamples(Sample sample) {
8989
.is(validWithLenient);
9090
}
9191

92+
@ParameterizedTest
93+
@MethodSource("_test.Sample#getAllSamples")
94+
public void testFullSupportOfQuoting(Sample sample) throws IOException {
95+
try (Csv.Reader reader = Csv.Reader.of(sample.getFormat(), Csv.ReaderOptions.DEFAULT, new StringReader(sample.getContent()))) {
96+
StringWriter actual = new StringWriter();
97+
try (Csv.Writer writer = Csv.Writer.of(sample.getFormat(), Csv.WriterOptions.DEFAULT, actual)) {
98+
if (reader.readLine()) {
99+
boolean wasComment = false;
100+
if (reader.isComment()) {
101+
wasComment = true;
102+
writer.writeComment(reader);
103+
} else {
104+
while (reader.readField()) {
105+
if (reader.isQuoted()) writer.writeQuotedField(reader);
106+
else writer.writeField(reader);
107+
}
108+
}
109+
while (reader.readLine()) {
110+
if (!wasComment) writer.writeEndOfLine();
111+
if (reader.isComment()) {
112+
wasComment = true;
113+
writer.writeComment(reader);
114+
} else {
115+
wasComment = false;
116+
while (reader.readField()) {
117+
if (reader.isQuoted()) writer.writeQuotedField(reader);
118+
else writer.writeField(reader);
119+
}
120+
}
121+
}
122+
if (!sample.isWithoutEOL()) writer.writeEndOfLine();
123+
}
124+
}
125+
assertThat(actual.toString()).isEqualTo(sample.getContent());
126+
}
127+
}
128+
92129
@ParameterizedTest
93130
@MethodSource("_test.Sample#getAllSamples")
94131
public void testSkip(Sample sample) throws IOException {
@@ -414,6 +451,20 @@ public void testIsComment() throws IOException {
414451
.isTrue();
415452
}
416453

454+
@Test
455+
public void testIsQuoted() throws IOException {
456+
String sample = "\"A\",B";
457+
QuickReader.read(r->{
458+
assertThat(r.readLine()).isTrue();
459+
assertThat(r.readField()).isTrue();
460+
assertThat(r.isQuoted()).isTrue();
461+
assertThat(r.readField()).isTrue();
462+
assertThat(r.isQuoted()).isFalse();
463+
assertThat(r.readField()).isFalse();
464+
assertThat(r.readLine()).isFalse();
465+
}, sample, RFC4180, Csv.ReaderOptions.DEFAULT);
466+
}
467+
417468
@Test
418469
public void testSurrogatePair() {
419470
String grinning = "😀";

0 commit comments

Comments
 (0)