From 130ff68856d8e52756bbebd611f9d6aa01f018be Mon Sep 17 00:00:00 2001 From: Landon Reed Date: Mon, 20 Apr 2020 16:08:14 -0400 Subject: [PATCH] fix(Table): do not skip empty lines in csv reader fix #280 --- src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java | 2 +- src/main/java/com/conveyal/gtfs/loader/Table.java | 3 +++ src/test/java/com/conveyal/gtfs/GTFSTest.java | 7 +++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java index fd7ba0277..37036657d 100644 --- a/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java +++ b/src/main/java/com/conveyal/gtfs/loader/JdbcGtfsLoader.java @@ -332,7 +332,7 @@ private int loadInternal (Table table) throws Exception { int keyFieldIndex = table.getKeyFieldIndex(fields); // Create separate fields array with filtered list that does not include null values (for duplicate headers or // ID field). This is solely used to construct the table and array of values to load. - Field[] cleanFields = Arrays.stream(fields).filter(field -> field != null).toArray(Field[]::new); + Field[] cleanFields = Arrays.stream(fields).filter(Objects::nonNull).toArray(Field[]::new); if (cleanFields.length == 0) { // Do not create the table if there are no valid fields. errorStorage.storeError(NewGTFSError.forTable(table, TABLE_MISSING_COLUMN_HEADERS)); diff --git a/src/main/java/com/conveyal/gtfs/loader/Table.java b/src/main/java/com/conveyal/gtfs/loader/Table.java index 817a8abda..6d2a0a522 100644 --- a/src/main/java/com/conveyal/gtfs/loader/Table.java +++ b/src/main/java/com/conveyal/gtfs/loader/Table.java @@ -520,6 +520,9 @@ public CsvReader getCsvReader(ZipFile zipFile, SQLErrorStorage sqlErrorStorage) // but the GTFS spec says that "files that include the UTF byte order mark are acceptable". InputStream bomInputStream = new BOMInputStream(zipInputStream); CsvReader csvReader = new CsvReader(bomInputStream, ',', Charset.forName("UTF8")); + // Don't skip empty records (this is set to true by default on CsvReader. We want to check for empty records + // during table load, so that they are logged as validation issues (WRONG_NUMBER_OF_FIELDS). + csvReader.setSkipEmptyRecords(false); csvReader.readHeaders(); return csvReader; } catch (IOException e) { diff --git a/src/test/java/com/conveyal/gtfs/GTFSTest.java b/src/test/java/com/conveyal/gtfs/GTFSTest.java index a3da82bf1..42ebe0fe8 100644 --- a/src/test/java/com/conveyal/gtfs/GTFSTest.java +++ b/src/test/java/com/conveyal/gtfs/GTFSTest.java @@ -141,6 +141,13 @@ public void canLoadFeedWithBadDates () { new ErrorExpectation(NewGTFSErrorType.REFERENTIAL_INTEGRITY), new ErrorExpectation(NewGTFSErrorType.DATE_FORMAT), new ErrorExpectation(NewGTFSErrorType.DATE_FORMAT), + // The below "wrong number of fields" errors are for empty new lines + // found in the file. + new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS), + new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS), + new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS), + new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS), + new ErrorExpectation(NewGTFSErrorType.WRONG_NUMBER_OF_FIELDS), new ErrorExpectation(NewGTFSErrorType.REFERENTIAL_INTEGRITY), new ErrorExpectation(NewGTFSErrorType.ROUTE_LONG_NAME_CONTAINS_SHORT_NAME), new ErrorExpectation(NewGTFSErrorType.FEED_TRAVEL_TIMES_ROUNDED),