From 6a194c5029283c56de625b36a2a23798c612430d Mon Sep 17 00:00:00 2001 From: "m.kluehspies" Date: Thu, 10 Mar 2022 17:09:20 +0100 Subject: [PATCH] fixes for mismatching entity vs csv header including some more tests --- .idea/csv-plugin.xml | 51 +++++++++++ .idea/misc.xml | 2 +- build.gradle | 5 +- gradle/wrapper/gradle-wrapper.properties | 2 +- .../com/github/mklueh/sexycsv/CsvParser.java | 85 ++++++++++++++----- .../github/mklueh/sexycsv/HeaderBuilder.java | 4 + .../github/mklueh/sexycsv/CsvParserTest.java | 56 ++++++++++-- .../com/github/mklueh/sexycsv/TestEntity.java | 5 ++ .../sample-data-comma-one-related-column.csv | 5 ++ .../sample-data-comma-unrelated-columns.csv | 5 ++ 10 files changed, 185 insertions(+), 35 deletions(-) create mode 100644 .idea/csv-plugin.xml create mode 100644 src/test/resources/sample-data-comma-one-related-column.csv create mode 100644 src/test/resources/sample-data-comma-unrelated-columns.csv diff --git a/.idea/csv-plugin.xml b/.idea/csv-plugin.xml new file mode 100644 index 0000000..b386dfe --- /dev/null +++ b/.idea/csv-plugin.xml @@ -0,0 +1,51 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 7142580..5c1ca7a 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -4,7 +4,7 @@ - + diff --git a/build.gradle b/build.gradle index 507d74d..b5d9a6b 100644 --- a/build.gradle +++ b/build.gradle @@ -7,6 +7,7 @@ plugins { scmVersion { versionCreator 'versionWithBranch' useHighestVersion = true + localOnly = true repository { pushTagsOnly = true } @@ -22,7 +23,9 @@ scmVersion { } group 'com.github.mklueh' -project.version = scmVersion.version +//project.version = scmVersion.version +sourceCompatibility = '11' +targetCompatibility = '11' repositories { mavenCentral() diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index be52383..ffed3a2 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/src/main/java/com/github/mklueh/sexycsv/CsvParser.java b/src/main/java/com/github/mklueh/sexycsv/CsvParser.java index 100bbff..99e9bda 100644 --- a/src/main/java/com/github/mklueh/sexycsv/CsvParser.java +++ b/src/main/java/com/github/mklueh/sexycsv/CsvParser.java @@ -11,7 +11,6 @@ import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Arrays; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Predicate; @@ -40,7 +39,15 @@ public class CsvParser { */ private boolean hasHeaderRow; - private HeaderBuilder.Header header; + /** + * Header loaded from entity if provided + */ + private HeaderBuilder.Header entityHeader; + + /** + * Header loaded from CSV if provided + */ + private HeaderBuilder.Header csvHeader; /** * Filter corrupt rows @@ -52,20 +59,29 @@ public class CsvParser { */ private Function tokenizer; - public Stream parse(Path path, Class clazz) throws IOException { + + public Stream parse(Path path, Class clazz) throws Exception { Stream rows = parseFile(path, clazz); + return mapRowsToEntities(clazz, rows); + + } + + private Stream mapRowsToEntities(Class clazz, Stream rows) { return rows.map(row -> { try { T o = (T) clazz.getDeclaredConstructor().newInstance(); Field[] fields = clazz.getDeclaredFields(); for (Field field : fields) { field.setAccessible(true); - String fieldName = field.getName(); + Type type = field.getGenericType(); CSVColumn annotation = field.getAnnotation(CSVColumn.class); + if (annotation != null) { + //lookup value in row by columnName String columnName = annotation.value(); String value = row.get(columnName); + if (type.getTypeName().equals(String.class.getName())) { field.set(o, value); } @@ -76,47 +92,70 @@ public Stream parse(Path path, Class clazz) throws IOException { throw new RuntimeException(e); } }); - } - public Stream parse(Path path) throws IOException { + public Stream parse(Path path) throws Exception { return parseFile(path, null); } - private Stream parseFile(Path path, Class entityClass) throws IOException { + private Stream parseFile(Path path, Class entityClass) throws Exception { + HeaderBuilder headerBuilder = new HeaderBuilder(charset()); + + //HeaderBuilder.Header entityHeader = headerBuilder.fromEntity(entityClass); - header = loadHeader(path, entityClass); + csvHeader = hasHeaderRow ? headerBuilder.fromFile(path, skipRows, delimiter, tokenizer) : null; + + this.entityHeader = loadHeader(path, entityClass); AtomicInteger line = new AtomicInteger(); return Files.lines(path, charset()) - .skip(skipRows + (hasHeaderRow ? 1 : 0)) - .filter(rowFilter != null ? rowFilter : s -> true) - .map(tokenizer != null ? tokenizer : s -> s.split(delimiter)) - .map(cells -> { - Row row = new Row(line.getAndIncrement()); - for (int i = 0; i < cells.length; i++) { - String h = null; - if (header != null) h = header.headers.get(i); - row.addCell(h, cells[i]); - } - return row; - }); + .skip(skipRows + (hasHeaderRow ? 1 : 0)) + .filter(rowFilter != null ? rowFilter : s -> true) + .map(tokenizer != null ? tokenizer : s -> s.split(delimiter)) + .map(cells -> { + Row row = new Row(line.getAndIncrement()); + + for (int i = 0; i < cells.length; i++) { + String value = cells[i]; + String headerName = null; + + if (entityHeader != null && csvHeader != null) { + headerName = csvHeader.headers.get(i); + } + + row.addCell(headerName, value); + } + + return row; + }); } protected HeaderBuilder.Header loadHeader(Path path, Class entityClass) throws IOException { HeaderBuilder headerBuilder = new HeaderBuilder(charset()); - if (header != null) { - return header; + + if (entityHeader != null) { + return entityHeader; } + /* + * If we do this, there might be a mismatch between entity and file, + * and we will parse by index anyway. + * + * parsing: expected header + * creating: target header + */ if (entityClass != null) { return headerBuilder.fromEntity(entityClass); } - if (hasHeaderRow) + /* + * parsing: given header + */ + if (hasHeaderRow) { return headerBuilder.fromFile(path, skipRows, delimiter, tokenizer); + } return new HeaderBuilder.Header(); } diff --git a/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java b/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java index 0f7e70e..6b003ba 100644 --- a/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java +++ b/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java @@ -27,6 +27,10 @@ public static class Header { Map headerPositionMap = new HashMap<>(); Map headerPositionMapInverse = new HashMap<>(); List headers = new ArrayList<>(); + + public boolean coveredBy(Header otherHeader) { + return false; + } } /** diff --git a/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java b/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java index 8b61177..54aa0f4 100644 --- a/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java +++ b/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java @@ -13,7 +13,7 @@ class CsvParserTest { @Test - void testParsingWithEntityHeaders() throws IOException { + void testParsingWithEntityHeaders() throws Exception { Path path = getCsv("sample-data-comma.csv"); SexyCSV.Parser parser = SexyCSV.Parser @@ -30,7 +30,7 @@ void testParsingWithEntityHeaders() throws IOException { @Test //.header(Arrays.asList("id", "name", "age", "country")) set optional header - void testParsingWithCustomTokenizer() throws IOException { + void testParsingWithCustomTokenizer() throws Exception { Path path = getCsv("sample-data-comma.csv"); SexyCSV.Parser parser = SexyCSV.Parser @@ -53,7 +53,7 @@ void testParsingWithCustomTokenizer() throws IOException { } @Test - void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException { + void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws Exception { Path path = getCsv("sample-data-pre-header-comma-corrupted.csv"); SexyCSV.Parser parser = SexyCSV.Parser @@ -71,15 +71,15 @@ void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException { } @Test - void testParsingCorruptedWithTabAndPreHeader() throws IOException { + void testParsingCorruptedWithTabAndPreHeader() throws Exception { Path path = getCsv("sample-data-pre-header-tab-corrupted.csv"); SexyCSV.Parser parser = SexyCSV.Parser.builder() - .delimiter("\t") - .hasHeaderRow(true) //auto-use of the given header - .skipRows(3) - .rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number - .build(); + .delimiter("\t") + .hasHeaderRow(true) //auto-use of the given header + .skipRows(3) + .rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number + .build(); List data = parser.parse(path).toList(); @@ -96,6 +96,44 @@ void testParsingCorruptedWithTabAndPreHeader() throws IOException { } + @Test + void testParsingUnrelatedFile() throws Exception { + Path path = getCsv("sample-data-comma-unrelated-columns.csv"); + + SexyCSV.Parser parser = SexyCSV.Parser + .builder() + .hasHeaderRow(true) + .delimiter(",") + .build(); + + List data = parser.parse(path, TestEntity.class).toList(); + + data.forEach(row -> System.out.println(row.toString())); + + assertEquals(4, data.size()); + data.forEach(testEntity -> assertTrue(testEntity.allNull())); + } + + + @Test + void testParsingOneRelatedFile() throws Exception { + Path path = getCsv("sample-data-comma-one-related-column.csv"); + + SexyCSV.Parser parser = SexyCSV.Parser + .builder() + .hasHeaderRow(true) + .delimiter(",") + .build(); + + List data = parser.parse(path, TestEntity.class).toList(); + + data.forEach(row -> System.out.println(row.toString())); + + assertEquals(4, data.size()); + data.forEach(testEntity -> assertFalse(testEntity.allNull())); + data.forEach(testEntity -> assertNotNull(testEntity.getId())); + } + private Path getCsv(String file) { return Paths.get("src", "test", "resources", file); } diff --git a/src/test/java/com/github/mklueh/sexycsv/TestEntity.java b/src/test/java/com/github/mklueh/sexycsv/TestEntity.java index a1fe78f..98d760e 100644 --- a/src/test/java/com/github/mklueh/sexycsv/TestEntity.java +++ b/src/test/java/com/github/mklueh/sexycsv/TestEntity.java @@ -21,4 +21,9 @@ public class TestEntity { @CSVColumn(value = "country") private String country; + + + public boolean allNull() { + return id == null && name == null && age == null && country == null; + } } \ No newline at end of file diff --git a/src/test/resources/sample-data-comma-one-related-column.csv b/src/test/resources/sample-data-comma-one-related-column.csv new file mode 100644 index 0000000..76af58d --- /dev/null +++ b/src/test/resources/sample-data-comma-one-related-column.csv @@ -0,0 +1,5 @@ +id,b ,c ,d ,e +1,Peter ,32,DE, +2,Adam ,22,DE, +3,Lisa ,23,EN, +4,Arthur,43,US, diff --git a/src/test/resources/sample-data-comma-unrelated-columns.csv b/src/test/resources/sample-data-comma-unrelated-columns.csv new file mode 100644 index 0000000..51bb5bf --- /dev/null +++ b/src/test/resources/sample-data-comma-unrelated-columns.csv @@ -0,0 +1,5 @@ +a,b ,c ,d ,e +1,Peter ,32,DE, +2,Adam ,22,DE, +3,Lisa ,23,EN, +4,Arthur,43,US,