From bfe2260354f3b34ec3f85a7205a03a3e0fbda5db Mon Sep 17 00:00:00 2001 From: Emanuela Epure <67077116+emanuelaepure10@users.noreply.github.com> Date: Wed, 29 Nov 2023 23:00:19 +0100 Subject: [PATCH] bug: add instances for empty rows Add the correct number of instances also for the cases when the rows contains empty cell as result of a formula ING-4135 --- .../hale/io/xls/ui/XLSSchemaTypePage.java | 3 +- .../hale/io/xls/AbstractAnalyseTable.java | 31 ++++++++++------- .../hale/io/xls/AnalyseXLSSchemaTable.java | 26 ++++++++------- .../eu/esdihumboldt/hale/io/xls/XLSUtil.java | 21 ++++-------- .../hale/io/xls/reader/XLSInstanceReader.java | 33 +++++++++++-------- .../hale/io/xls/reader/XLSSchemaReader.java | 2 +- 6 files changed, 65 insertions(+), 51 deletions(-) diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java b/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java index 1cfb113deb..9f63544270 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java @@ -235,12 +235,13 @@ private void sheetSelectionChanged(int sheetNum) throws Exception { } // update whole page with current sheet number + // no lines should be skipped private void update(int sheetNum) throws Exception { // if the sheet is empty an Exception occurs AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable( getWizard().getProvider().getSource(), - ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum); + ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0); setHeader(analyser.getHeader().toArray(new String[0])); diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AbstractAnalyseTable.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AbstractAnalyseTable.java index 482e5b500e..56470330f8 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AbstractAnalyseTable.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AbstractAnalyseTable.java @@ -50,7 +50,7 @@ public abstract class AbstractAnalyseTable { */ protected void analyse(LocatableInputSupplier source, boolean xlsx) throws Exception { - analyse(source, xlsx, 0); + analyse(source, xlsx, 0, 0); } /** @@ -58,10 +58,11 @@ protected void analyse(LocatableInputSupplier source, boolean xlsx) * * @param source the source to load the file from * @param sheetNum number of the sheet that should be loaded (0-based) + * @param skipNlines number of lines to skip * @throws Exception if an error occurs loading the file */ protected void analyse(LocatableInputSupplier source, boolean xlsx, - int sheetNum) throws Exception { + int sheetNum, int skipNlines) throws Exception { try (InputStream inp = new BufferedInputStream(source.getInput());) { // https://poi.apache.org/components/spreadsheet/quick-guide.html#FileInputStream URI location = source.getLocation(); @@ -71,10 +72,12 @@ protected void analyse(LocatableInputSupplier source, boo evaluator = wb.getCreationHelper().createFormulaEvaluator(); // the first row represents the header - analyseHeader(sheet); + if (skipNlines <= 0) { + analyseHeader(sheet); + } // load configuration entries - analyseContent(sheet); + analyseContent(sheet, skipNlines); } finally { // reset evaluator reference evaluator = null; @@ -136,17 +139,23 @@ protected void analyseHeader(Sheet sheet) { protected abstract void headerCell(int num, String text); /** - * Analyse the table content. + * Analyse the table content. if there is a header: the header line wont be + * added to the content rows if no header: all the rows will be added to the + * content * * @param sheet the table sheet + * @param skipNlines number of lines to the skipped */ - private void analyseContent(Sheet sheet) { - // for each row starting from the second - for (int i = 1; i <= sheet.getLastRowNum(); i++) { + private void analyseContent(Sheet sheet, int skipNlines) { + int startingLine = 0; + if (skipNlines <= 0) { + startingLine = 1; + } + + // for each row starting from the second?? + for (int i = startingLine; i <= sheet.getLastRowNum(); i++) { Row row = sheet.getRow(i); - if (row != null) { - analyseRow(i, row, sheet); - } + analyseRow(i, row, sheet); } } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java index 31cd0feb72..29bfd30476 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java @@ -18,9 +18,8 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; @@ -36,7 +35,7 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable { private final List header; - private final Map> rows; + private final LinkedHashMap> rows; /** * Default constructor @@ -47,12 +46,12 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable { * @throws Exception thrown if the analysis fails */ public AnalyseXLSSchemaTable(LocatableInputSupplier source, boolean xlsx, - int sheetNum) throws Exception { + int sheetNum, int skipNlines) throws Exception { header = new ArrayList(); - rows = new HashMap>(); + rows = new LinkedHashMap>(); - analyse(source, xlsx, sheetNum); + analyse(source, xlsx, sheetNum, skipNlines); } /** @@ -73,12 +72,17 @@ protected void headerCell(int num, String text) { */ @Override protected void analyseRow(int num, Row row, Sheet sheet) { - List rowContent = new ArrayList(); - for (int i = 0; i < row.getLastCellNum(); i++) { - rowContent.add(extractText(row.getCell(i), sheet)); + if (row != null) { + List rowContent = new ArrayList(); + for (int i = 0; i < row.getLastCellNum(); i++) { + rowContent.add(extractText(row.getCell(i), sheet)); + } + if (!rowContent.isEmpty() && rowContent.stream().anyMatch(text -> text != null)) { + rows.put(num, rowContent); + } } - if (!rowContent.isEmpty() && rowContent.stream().anyMatch(text -> text != null)) { - rows.put(num, rowContent); + else { + rows.put(num, null); } } diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java index 3bc8a6e05c..43af849818 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java @@ -59,30 +59,23 @@ public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sh CellValue value = evaluator.evaluate(cell); - if (CellType.BLANK.equals(value.getCellType())) { + switch (value.getCellType()) { + case BLANK: return null; - } - else if (CellType.BOOLEAN.equals(value.getCellType())) { + case BOOLEAN: return String.valueOf(value.getBooleanValue()); - } - else if (CellType.NUMERIC.equals(value.getCellType())) { - // number formatting + case NUMERIC: double number = value.getNumberValue(); if (number == Math.floor(number)) { - // it's an integer return String.valueOf((int) number); } return String.valueOf(value.getNumberValue()); - } - else if (CellType.STRING.equals(value.getCellType())) { + case STRING: return value.getStringValue(); - } - else { -// if (CellType.FORMULA.equals(value.getCellType())) - // if (CellType.ERROR.equals(value.getCellType())) - // fall through + default: return null; } + } private static boolean isCellPartOfMergedRegion(Cell cell, Sheet sheet) { diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java index bb202f981d..bea2f1c7cf 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSInstanceReader.java @@ -139,8 +139,12 @@ public static TypeDefinition matchTypeByName(QName typeName, TypeIndex schema) { } private void loadSheet(SheetInfo sheet, IOReporter reporter) throws Exception { + int skipNlines = sheet.getSettings().getSkipLines() != null + ? sheet.getSettings().getSkipLines() + : 0; + AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable(getSource(), - ReaderSettings.isXlsxContentType(getContentType()), sheet.getIndex()); + ReaderSettings.isXlsxContentType(getContentType()), sheet.getIndex(), skipNlines); // get type definition of the schema QName typeName = sheet.getSettings().getTypeName(); @@ -165,25 +169,24 @@ private void loadSheet(SheetInfo sheet, IOReporter reporter) throws Exception { Collection> rows = analyser.getRows(); int line = 0; - int skipNlines = sheet.getSettings().getSkipLines() != null - ? sheet.getSettings().getSkipLines() - : 0; if (skipNlines <= 0) { // do not skip any lines - + line++; // the header row is added as instance addInstanceForRow(analyser.getHeader(), type, propAr, line, reporter); - line++; - // iterate over all rows to create the instances Iterator> allRows = rows.iterator(); while (allRows.hasNext()) { + line++; List row = allRows.next(); - addInstanceForRow(row, type, propAr, line, reporter); - line++; + boolean allNullOrEmpty = row.stream().allMatch(s -> s == null || s.isEmpty()); + if (!allNullOrEmpty) { + addInstanceForRow(row, type, propAr, line, reporter); + } + } } else { @@ -191,12 +194,13 @@ private void loadSheet(SheetInfo sheet, IOReporter reporter) throws Exception { Iterator> allRows = rows.iterator(); while (allRows.hasNext()) { List row = allRows.next(); - if (!(skipNlines - 1 > 0)) { - addInstanceForRow(row, type, propAr, line, reporter); + if (!(skipNlines > 0) && row != null) { + boolean allNullOrEmpty = row.stream().allMatch(s -> s == null || s.isEmpty()); + if (!allNullOrEmpty) { + addInstanceForRow(row, type, propAr, line, reporter); + } } - skipNlines--; - line++; } } @@ -215,6 +219,9 @@ private void loadSheet(SheetInfo sheet, IOReporter reporter) throws Exception { private void addInstanceForRow(List row, TypeDefinition type, PropertyDefinition[] propAr, int line, IOReporter reporter) { MutableInstance instance = new DefaultInstance(type, null); + if (row == null) { + return; + } // int propertyIndex = 0; for (int index = 0; index < propAr.length; index++) { diff --git a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java index 16f279e154..3ae6c00622 100644 --- a/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java +++ b/io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/XLSSchemaReader.java @@ -81,7 +81,7 @@ protected Schema loadFromSource(ProgressIndicator progress, IOReporter reporter) try { analyser = new AnalyseXLSSchemaTable(getSource(), - ReaderSettings.isXlsxContentType(getContentType()), sheetNum); + ReaderSettings.isXlsxContentType(getContentType()), sheetNum, 0); header = analyser.getHeader(); // create type definition