From 6a194c5029283c56de625b36a2a23798c612430d Mon Sep 17 00:00:00 2001
From: "m.kluehspies" <m.kluehspies@mainfirst-invest.com>
Date: Thu, 10 Mar 2022 17:09:20 +0100
Subject: [PATCH] fixes for mismatching entity vs csv header including some
 more tests

---
 .idea/csv-plugin.xml                          | 51 +++++++++++
 .idea/misc.xml                                |  2 +-
 build.gradle                                  |  5 +-
 gradle/wrapper/gradle-wrapper.properties      |  2 +-
 .../com/github/mklueh/sexycsv/CsvParser.java  | 85 ++++++++++++++-----
 .../github/mklueh/sexycsv/HeaderBuilder.java  |  4 +
 .../github/mklueh/sexycsv/CsvParserTest.java  | 56 ++++++++++--
 .../com/github/mklueh/sexycsv/TestEntity.java |  5 ++
 .../sample-data-comma-one-related-column.csv  |  5 ++
 .../sample-data-comma-unrelated-columns.csv   |  5 ++
 10 files changed, 185 insertions(+), 35 deletions(-)
 create mode 100644 .idea/csv-plugin.xml
 create mode 100644 src/test/resources/sample-data-comma-one-related-column.csv
 create mode 100644 src/test/resources/sample-data-comma-unrelated-columns.csv
diff --git a/.idea/csv-plugin.xml b/.idea/csv-plugin.xml
new file mode 100644
index 0000000..b386dfe
--- /dev/null
+++ b/.idea/csv-plugin.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CsvFileAttributes">
+    <option name="attributeMap">
+      <map>
+        <entry key="\src\test\resources\sample-data-comma-one-related-column.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\src\test\resources\sample-data-comma-unrelated-columns.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\src\test\resources\sample-data-comma.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\src\test\resources\sample-data-pre-header-comma-corrupted.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\src\test\resources\sample-data-pre-header-semicolon-corrupted.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value=";" />
+            </Attribute>
+          </value>
+        </entry>
+        <entry key="\src\test\resources\sample-data-pre-header-tab-corrupted.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="&#9;" />
+            </Attribute>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 7142580..5c1ca7a 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -4,7 +4,7 @@
   <component name="FrameworkDetectionExcludesConfiguration">
     <file type="web" url="file://$PROJECT_DIR$" />
   </component>
-  <component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="openjdk-17" project-jdk-type="JavaSDK">
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="openjdk-17" project-jdk-type="JavaSDK">
     <output url="file://$PROJECT_DIR$/out" />
   </component>
   <component name="ProjectType">
diff --git a/build.gradle b/build.gradle
index 507d74d..b5d9a6b 100644
--- a/build.gradle
+++ b/build.gradle
@@ -7,6 +7,7 @@ plugins {
 scmVersion {
     versionCreator 'versionWithBranch'
     useHighestVersion = true
+    localOnly = true
     repository {
         pushTagsOnly = true
     }
@@ -22,7 +23,9 @@ scmVersion {
 }
 
 group 'com.github.mklueh'
-project.version = scmVersion.version
+//project.version = scmVersion.version
+sourceCompatibility = '11'
+targetCompatibility = '11'
 
 repositories {
     mavenCentral()
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
index be52383..ffed3a2 100644
--- a/gradle/wrapper/gradle-wrapper.properties
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -1,5 +1,5 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.7-bin.zip
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
diff --git a/src/main/java/com/github/mklueh/sexycsv/CsvParser.java b/src/main/java/com/github/mklueh/sexycsv/CsvParser.java
index 100bbff..99e9bda 100644
--- a/src/main/java/com/github/mklueh/sexycsv/CsvParser.java
+++ b/src/main/java/com/github/mklueh/sexycsv/CsvParser.java
@@ -11,7 +11,6 @@
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Arrays;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import java.util.function.Predicate;
@@ -40,7 +39,15 @@ public class CsvParser {
      */
     private boolean hasHeaderRow;
 
-    private HeaderBuilder.Header header;
+    /**
+     * Header loaded from entity if provided
+     */
+    private HeaderBuilder.Header entityHeader;
+
+    /**
+     * Header loaded from CSV if provided
+     */
+    private HeaderBuilder.Header csvHeader;
 
     /**
      * Filter corrupt rows
@@ -52,20 +59,29 @@ public class CsvParser {
      */
     private Function<? super String, String[]> tokenizer;
 
-    public <T> Stream<T> parse(Path path, Class<T> clazz) throws IOException {
+
+    public <T> Stream<T> parse(Path path, Class<T> clazz) throws Exception {
         Stream<Row> rows = parseFile(path, clazz);
+        return mapRowsToEntities(clazz, rows);
+
+    }
+
+    private <T> Stream<T> mapRowsToEntities(Class<T> clazz, Stream<Row> rows) {
         return rows.map(row -> {
             try {
                 T o = (T) clazz.getDeclaredConstructor().newInstance();
                 Field[] fields = clazz.getDeclaredFields();
                 for (Field field : fields) {
                     field.setAccessible(true);
-                    String fieldName = field.getName();
+
                     Type type = field.getGenericType();
                     CSVColumn annotation = field.getAnnotation(CSVColumn.class);
+
                     if (annotation != null) {
+                        //lookup value in row by columnName
                         String columnName = annotation.value();
                         String value = row.get(columnName);
+
                         if (type.getTypeName().equals(String.class.getName())) {
                             field.set(o, value);
                         }
@@ -76,47 +92,70 @@ public <T> Stream<T> parse(Path path, Class<T> clazz) throws IOException {
                 throw new RuntimeException(e);
             }
         });
-
     }
 
 
-    public Stream<Row> parse(Path path) throws IOException {
+    public Stream<Row> parse(Path path) throws Exception {
         return parseFile(path, null);
     }
 
-    private <T> Stream<Row> parseFile(Path path, Class<T> entityClass) throws IOException {
+    private <T> Stream<Row> parseFile(Path path, Class<T> entityClass) throws Exception {
+        HeaderBuilder headerBuilder = new HeaderBuilder(charset());
+
+        //HeaderBuilder.Header entityHeader = headerBuilder.fromEntity(entityClass);
 
-        header = loadHeader(path, entityClass);
+        csvHeader = hasHeaderRow ? headerBuilder.fromFile(path, skipRows, delimiter, tokenizer) : null;
+
+        this.entityHeader = loadHeader(path, entityClass);
 
         AtomicInteger line = new AtomicInteger();
 
         return Files.lines(path, charset())
-                .skip(skipRows + (hasHeaderRow ? 1 : 0))
-                .filter(rowFilter != null ? rowFilter : s -> true)
-                .map(tokenizer != null ? tokenizer : s -> s.split(delimiter))
-                .map(cells -> {
-                    Row row = new Row(line.getAndIncrement());
-                    for (int i = 0; i < cells.length; i++) {
-                        String h = null;
-                        if (header != null) h = header.headers.get(i);
-                        row.addCell(h, cells[i]);
-                    }
-                    return row;
-                });
+                    .skip(skipRows + (hasHeaderRow ? 1 : 0))
+                    .filter(rowFilter != null ? rowFilter : s -> true)
+                    .map(tokenizer != null ? tokenizer : s -> s.split(delimiter))
+                    .map(cells -> {
+                        Row row = new Row(line.getAndIncrement());
+
+                        for (int i = 0; i < cells.length; i++) {
+                            String value = cells[i];
+                            String headerName = null;
+
+                            if (entityHeader != null && csvHeader != null) {
+                                headerName = csvHeader.headers.get(i);
+                            }
+
+                            row.addCell(headerName, value);
+                        }
+
+                        return row;
+                    });
     }
 
     protected <T> HeaderBuilder.Header loadHeader(Path path, Class<T> entityClass) throws IOException {
         HeaderBuilder headerBuilder = new HeaderBuilder(charset());
-        if (header != null) {
-            return header;
+
+        if (entityHeader != null) {
+            return entityHeader;
         }
 
+        /*
+         * If we do this, there might be a mismatch between entity and file,
+         * and we will parse by index anyway.
+         *
+         * parsing: expected header
+         * creating: target header
+         */
         if (entityClass != null) {
             return headerBuilder.fromEntity(entityClass);
         }
 
-        if (hasHeaderRow)
+        /*
+         * parsing: given header
+         */
+        if (hasHeaderRow) {
             return headerBuilder.fromFile(path, skipRows, delimiter, tokenizer);
+        }
 
         return new HeaderBuilder.Header();
     }
diff --git a/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java b/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java
index 0f7e70e..6b003ba 100644
--- a/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java
+++ b/src/main/java/com/github/mklueh/sexycsv/HeaderBuilder.java
@@ -27,6 +27,10 @@ public static class Header {
         Map<String, Integer> headerPositionMap = new HashMap<>();
         Map<Integer, String> headerPositionMapInverse = new HashMap<>();
         List<String> headers = new ArrayList<>();
+
+        public boolean coveredBy(Header otherHeader) {
+            return false;
+        }
     }
 
     /**
diff --git a/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java b/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java
index 8b61177..54aa0f4 100644
--- a/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java
+++ b/src/test/java/com/github/mklueh/sexycsv/CsvParserTest.java
@@ -13,7 +13,7 @@
 class CsvParserTest {
 
     @Test
-    void testParsingWithEntityHeaders() throws IOException {
+    void testParsingWithEntityHeaders() throws Exception {
         Path path = getCsv("sample-data-comma.csv");
 
         SexyCSV.Parser parser = SexyCSV.Parser
@@ -30,7 +30,7 @@ void testParsingWithEntityHeaders() throws IOException {
 
     @Test
 //.header(Arrays.asList("id", "name", "age", "country")) set optional header
-    void testParsingWithCustomTokenizer() throws IOException {
+    void testParsingWithCustomTokenizer() throws Exception {
         Path path = getCsv("sample-data-comma.csv");
 
         SexyCSV.Parser parser = SexyCSV.Parser
@@ -53,7 +53,7 @@ void testParsingWithCustomTokenizer() throws IOException {
     }
 
     @Test
-    void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException {
+    void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws Exception {
         Path path = getCsv("sample-data-pre-header-comma-corrupted.csv");
 
         SexyCSV.Parser parser = SexyCSV.Parser
@@ -71,15 +71,15 @@ void testParsingCorruptedWithCustomTokenizerAndPreHeader() throws IOException {
     }
 
     @Test
-    void testParsingCorruptedWithTabAndPreHeader() throws IOException {
+    void testParsingCorruptedWithTabAndPreHeader() throws Exception {
         Path path = getCsv("sample-data-pre-header-tab-corrupted.csv");
 
         SexyCSV.Parser parser = SexyCSV.Parser.builder()
-                .delimiter("\t")
-                .hasHeaderRow(true) //auto-use of the given header
-                .skipRows(3)
-                .rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number
-                .build();
+                                              .delimiter("\t")
+                                              .hasHeaderRow(true) //auto-use of the given header
+                                              .skipRows(3)
+                                              .rowFilter(s -> s.matches("^\\d.*")) //we are only interested in rows that start with a number
+                                              .build();
 
         List<Row> data = parser.parse(path).toList();
 
@@ -96,6 +96,44 @@ void testParsingCorruptedWithTabAndPreHeader() throws IOException {
 
     }
 
+    @Test
+    void testParsingUnrelatedFile() throws Exception {
+        Path path = getCsv("sample-data-comma-unrelated-columns.csv");
+
+        SexyCSV.Parser parser = SexyCSV.Parser
+                .builder()
+                .hasHeaderRow(true)
+                .delimiter(",")
+                .build();
+
+        List<TestEntity> data = parser.parse(path, TestEntity.class).toList();
+
+        data.forEach(row -> System.out.println(row.toString()));
+
+        assertEquals(4, data.size());
+        data.forEach(testEntity -> assertTrue(testEntity.allNull()));
+    }
+
+
+    @Test
+    void testParsingOneRelatedFile() throws Exception {
+        Path path = getCsv("sample-data-comma-one-related-column.csv");
+
+        SexyCSV.Parser parser = SexyCSV.Parser
+                .builder()
+                .hasHeaderRow(true)
+                .delimiter(",")
+                .build();
+
+        List<TestEntity> data = parser.parse(path, TestEntity.class).toList();
+
+        data.forEach(row -> System.out.println(row.toString()));
+
+        assertEquals(4, data.size());
+        data.forEach(testEntity -> assertFalse(testEntity.allNull()));
+        data.forEach(testEntity -> assertNotNull(testEntity.getId()));
+    }
+
     private Path getCsv(String file) {
         return Paths.get("src", "test", "resources", file);
     }
diff --git a/src/test/java/com/github/mklueh/sexycsv/TestEntity.java b/src/test/java/com/github/mklueh/sexycsv/TestEntity.java
index a1fe78f..98d760e 100644
--- a/src/test/java/com/github/mklueh/sexycsv/TestEntity.java
+++ b/src/test/java/com/github/mklueh/sexycsv/TestEntity.java
@@ -21,4 +21,9 @@ public class TestEntity {
 
     @CSVColumn(value = "country")
     private String country;
+
+
+    public boolean allNull() {
+        return id == null && name == null && age == null && country == null;
+    }
 }
\ No newline at end of file
diff --git a/src/test/resources/sample-data-comma-one-related-column.csv b/src/test/resources/sample-data-comma-one-related-column.csv
new file mode 100644
index 0000000..76af58d
--- /dev/null
+++ b/src/test/resources/sample-data-comma-one-related-column.csv
@@ -0,0 +1,5 @@
+id,b     ,c ,d ,e
+1,Peter ,32,DE,
+2,Adam  ,22,DE,
+3,Lisa  ,23,EN,
+4,Arthur,43,US,
diff --git a/src/test/resources/sample-data-comma-unrelated-columns.csv b/src/test/resources/sample-data-comma-unrelated-columns.csv
new file mode 100644
index 0000000..51bb5bf
--- /dev/null
+++ b/src/test/resources/sample-data-comma-unrelated-columns.csv
@@ -0,0 +1,5 @@
+a,b     ,c ,d ,e
+1,Peter ,32,DE,
+2,Adam  ,22,DE,
+3,Lisa  ,23,EN,
+4,Arthur,43,US,