Skip to content
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.

Commit

Permalink
fixes for mismatching entity vs csv header including some more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
m.kluehspies committed Mar 10, 2022
1 parent 6d0a65c commit 6a194c5
Show file tree
Hide file tree
Showing 10 changed files with 185 additions and 35 deletions.
51 changes: 51 additions & 0 deletions .idea/csv-plugin.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ plugins {
scmVersion {
versionCreator 'versionWithBranch'
useHighestVersion = true
localOnly = true
repository {
pushTagsOnly = true
}
Expand All @@ -22,7 +23,9 @@ scmVersion {
}

group 'com.github.mklueh'
project.version = scmVersion.version
//project.version = scmVersion.version
sourceCompatibility = '11'
targetCompatibility = '11'

repositories {
mavenCentral()
Expand Down
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
85 changes: 62 additions & 23 deletions src/main/java/com/github/mklueh/sexycsv/CsvParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;
Expand Down Expand Up @@ -40,7 +39,15 @@ public class CsvParser {
*/
private boolean hasHeaderRow;

private HeaderBuilder.Header header;
/**
* Header loaded from entity if provided
*/
private HeaderBuilder.Header entityHeader;

/**
* Header loaded from CSV if provided
*/
private HeaderBuilder.Header csvHeader;

/**
* Filter corrupt rows
Expand All @@ -52,20 +59,29 @@ public class CsvParser {
*/
private Function<? super String, String[]> tokenizer;

public <T> Stream<T> parse(Path path, Class<T> clazz) throws IOException {

public <T> Stream<T> parse(Path path, Class<T> clazz) throws Exception {
Stream<Row> rows = parseFile(path, clazz);
return mapRowsToEntities(clazz, rows);

}

private <T> Stream<T> mapRowsToEntities(Class<T> clazz, Stream<Row> rows) {
return rows.map(row -> {
try {
T o = (T) clazz.getDeclaredConstructor().newInstance();
Field[] fields = clazz.getDeclaredFields();
for (Field field : fields) {
field.setAccessible(true);
String fieldName = field.getName();

Type type = field.getGenericType();
CSVColumn annotation = field.getAnnotation(CSVColumn.class);

if (annotation != null) {
//lookup value in row by columnName
String columnName = annotation.value();
String value = row.get(columnName);

if (type.getTypeName().equals(String.class.getName())) {
field.set(o, value);
}
Expand All @@ -76,47 +92,70 @@ public <T> Stream<T> parse(Path path, Class<T> clazz) throws IOException {
throw new RuntimeException(e);
}
});

}


public Stream<Row> parse(Path path) throws IOException {
public Stream<Row> parse(Path path) throws Exception {
return parseFile(path, null);
}

private <T> Stream<Row> parseFile(Path path, Class<T> entityClass) throws IOException {
private <T> Stream<Row> parseFile(Path path, Class<T> entityClass) throws Exception {
HeaderBuilder headerBuilder = new HeaderBuilder(charset());

//HeaderBuilder.Header entityHeader = headerBuilder.fromEntity(entityClass);

header = loadHeader(path, entityClass);
csvHeader = hasHeaderRow ? headerBuilder.fromFile(path, skipRows, delimiter, tokenizer) : null;

this.entityHeader = loadHeader(path, entityClass);

AtomicInteger line = new AtomicInteger();

return Files.lines(path, charset())
.skip(skipRows + (hasHeaderRow ? 1 : 0))
.filter(rowFilter != null ? rowFilter : s -> true)
.map(tokenizer != null ? tokenizer : s -> s.split(delimiter))
.map(cells -> {
Row row = new Row(line.getAndIncrement());
for (int i = 0; i < cells.length; i++) {
String h = null;
if (header != null) h = header.headers.get(i);
row.addCell(h, cells[i]);
}
return row;
});
.skip(skipRows + (hasHeaderRow ? 1 : 0))
.filter(rowFilter != null ? rowFilter : s -> true)
.map(tokenizer != null ? tokenizer : s -> s.split(delimiter))
.map(cells -> {
Row row = new Row(line.getAndIncrement());

for (int i = 0; i < cells.length; i++) {
String value = cells[i];
String headerName = null;

if (entityHeader != null && csvHeader != null) {
headerName = csvHeader.headers.get(i);
}

row.addCell(headerName, value);
}

return row;
});
}

protected <T> HeaderBuilder.Header loadHeader(Path path, Class<T> entityClass) throws IOException {
HeaderBuilder headerBuilder = new HeaderBuilder(charset());
if (header != null) {
return header;

if (entityHeader != null) {
return entityHeader;
}

/*
* If we do this, there might be a mismatch between entity and file,
* and we will parse by index anyway.
*
* parsing: expected header
* creating: target header
*/
if (entityClass != null) {
return headerBuilder.fromEntity(entityClass);
}

if (hasHeaderRow)
/*
* parsing: given header
*/
if (hasHeaderRow) {
return headerBuilder.fromFile(path, skipRows, delimiter, tokenizer);
}

return new HeaderBuilder.Header();
}
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ public static class Header {
Map<String, Integer> headerPositionMap = new HashMap<>();
Map<Integer, String> headerPositionMapInverse = new HashMap<>();
List<String> headers = new ArrayList<>();

public boolean coveredBy(Header otherHeader) {
return false;
}
}

/**
Expand Down
56 changes: 47 additions & 9 deletions src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class CsvParserTest {

@Test
void testParsingWithEntityHeaders() throws IOException {
void testParsingWithEntityHeaders() throws Exception {
Path path = getCsv("sample-data-comma.csv");

SexyCSV.Parser parser = SexyCSV.Parser
Expand All @@ -30,7 +30,7 @@ void testParsingWithEntityHeaders() throws IOException {

@Test
//.header(Arrays.asList("id", "name", "age", "country")) set optional header
void testParsingWithCustomTokenizer() throws IOException {
void testParsingWithCustomTokenizer() throws Exception {
Path path = getCsv("sample-data-comma.csv");

SexyCSV.Parser parser = SexyCSV.Parser
Expand All @@ -53,7 +53,7 @@ void testParsingWithCustomTokenizer() throws IOException {
}

@Test
void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException {
void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws Exception {
Path path = getCsv("sample-data-pre-header-comma-corrupted.csv");

SexyCSV.Parser parser = SexyCSV.Parser
Expand All @@ -71,15 +71,15 @@ void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException {
}

@Test
void testParsingCorruptedWithTabAndPreHeader() throws IOException {
void testParsingCorruptedWithTabAndPreHeader() throws Exception {
Path path = getCsv("sample-data-pre-header-tab-corrupted.csv");

SexyCSV.Parser parser = SexyCSV.Parser.builder()
.delimiter("\t")
.hasHeaderRow(true) //auto-use of the given header
.skipRows(3)
.rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number
.build();
.delimiter("\t")
.hasHeaderRow(true) //auto-use of the given header
.skipRows(3)
.rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number
.build();

List<Row> data = parser.parse(path).toList();

Expand All @@ -96,6 +96,44 @@ void testParsingCorruptedWithTabAndPreHeader() throws IOException {

}

@Test
void testParsingUnrelatedFile() throws Exception {
Path path = getCsv("sample-data-comma-unrelated-columns.csv");

SexyCSV.Parser parser = SexyCSV.Parser
.builder()
.hasHeaderRow(true)
.delimiter(",")
.build();

List<TestEntity> data = parser.parse(path, TestEntity.class).toList();

data.forEach(row -> System.out.println(row.toString()));

assertEquals(4, data.size());
data.forEach(testEntity -> assertTrue(testEntity.allNull()));
}


@Test
void testParsingOneRelatedFile() throws Exception {
Path path = getCsv("sample-data-comma-one-related-column.csv");

SexyCSV.Parser parser = SexyCSV.Parser
.builder()
.hasHeaderRow(true)
.delimiter(",")
.build();

List<TestEntity> data = parser.parse(path, TestEntity.class).toList();

data.forEach(row -> System.out.println(row.toString()));

assertEquals(4, data.size());
data.forEach(testEntity -> assertFalse(testEntity.allNull()));
data.forEach(testEntity -> assertNotNull(testEntity.getId()));
}

private Path getCsv(String file) {
return Paths.get("src", "test", "resources", file);
}
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/com/github/mklueh/sexycsv/TestEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,9 @@ public class TestEntity {

@CSVColumn(value = "country")
private String country;


public boolean allNull() {
return id == null && name == null && age == null && country == null;
}
}
5 changes: 5 additions & 0 deletions src/test/resources/sample-data-comma-one-related-column.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id,b ,c ,d ,e
1,Peter ,32,DE,
2,Adam ,22,DE,
3,Lisa ,23,EN,
4,Arthur,43,US,
5 changes: 5 additions & 0 deletions src/test/resources/sample-data-comma-unrelated-columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
a,b ,c ,d ,e
1,Peter ,32,DE,
2,Adam ,22,DE,
3,Lisa ,23,EN,
4,Arthur,43,US,

0 comments on commit 6a194c5

Please sign in to comment.