Skip to content

Commit

Permalink
feat: add formatter for the date
Browse files Browse the repository at this point in the history
hale: add new Excel reader parameter that allows to specify import format for date cells
At the moment the formatter has the following format: yyyy-MM-dd, but should be customizable by the user with a new issue.

ING-4151

feat: add formatter for the date

hale: add new Excel reader parameter that allows to specify import format for date cells
At the moment the formatter has the following format: yyyy-MM-dd, but should be customizable by the user with a new issue.

ING-4151
  • Loading branch information
emanuelaepure10 committed Jan 5, 2024
1 parent ec50283 commit bdaf0f4
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 34 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

package eu.esdihumboldt.hale.io.xls.test

import java.text.SimpleDateFormat
import java.time.LocalDate

import eu.esdihumboldt.hale.common.instance.groovy.InstanceBuilder
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection
import eu.esdihumboldt.hale.common.schema.groovy.SchemaBuilder
Expand All @@ -26,6 +29,11 @@ class XLSInstanceWriterTestExamples {

Schema schema = createSchema()

// Declare a date in the "dd/mm/yyyy" format
def dateString1 = "25/12/2023"
def dateFormat1 = new SimpleDateFormat("dd/MM/yyyy")
def date1 = dateFormat1.parse(dateString1)

// create the instance collection
// concrete types are only strings, since the test is not able to choose the correct type in wizard
InstanceCollection instances = new InstanceBuilder(types: schema).createCollection {
Expand Down Expand Up @@ -59,6 +67,7 @@ class XLSInstanceWriterTestExamples {
name('other')
number('1')
description('other type')
date(date1)
}
}
}
Expand Down Expand Up @@ -91,9 +100,9 @@ class XLSInstanceWriterTestExamples {
name(String)
number(String)
description(String)
date(LocalDate)
}
}
return schema;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.function.Consumer;

Expand Down Expand Up @@ -128,7 +133,6 @@ public void testReadSimple() throws Exception {
assertEquals(dataFirstColumn[i], value[0]);
assertTrue(value[0] instanceof String);
}

}

/**
Expand Down Expand Up @@ -615,4 +619,111 @@ private InstanceCollection readXLSInstances(String sourceLocation, Schema source
return instanceReader.getInstances();
}

/**
* Test - read a sample xls schema and data from same file and sheet (simple
* io test). Check the type, check the properties, check the values of the
* properties, check the datatype of the properties
*
* @throws Exception , if an error occurs
*/
@Test
// public void testReadSimpleWithDate() throws Exception {
// String typeName = "item";
// String[] properties = { "number", "name", "desc", "date" };
// String[] dataFirstColumn = { "1234", "Glasses", "Pair of", "12.12.2023" };
// String dateFormatter = "dd.mm.yyyy";
// String sourceLocation = "/data/simpleOneSheetDate.xls";
//
// // read Schema ###
// Schema schema = readXLSSchemaDate(sourceLocation, 0, typeName, dateFormatter,
// "java.lang.String,java.lang.String,java.lang.String,java.lang.String");
// // read Instances - not header ###
// InstanceCollection instances = readXLSInstances("/data/simpleOneSheetDate.xls", 0, typeName,
// 1, schema);
//
// // Check the values of the first (type) instance
// Iterator<Instance> instanceIt = instances.iterator();
// while (instanceIt.hasNext()) {
// Instance instance = instanceIt.next();
//
// Object[] value = instance.getProperty(QName.valueOf(properties[properties.length - 1]));
// String dateString = (String) value[0];
// assertTrue(isStringDate(dateString, dateFormatter));
// }
// }

public void testReadSimpleWithDate() throws Exception {
// Define test data
String typeName = "item";
String[] properties = { "number", "name", "desc", "date" };
String[] dataFirstColumn = { "1234", "Glasses", "Pair of", "12.12.2023" };
String dateFormatter = "dd.MM.yyyy";
String sourceLocation = "/data/simpleOneSheetDate.xls";

// Read Schema
Schema schema = readXLSSchemaDate(sourceLocation, 0, typeName, dateFormatter,
"java.lang.String,java.lang.String,java.lang.String,java.lang.String");

// Read Instances (without header)
InstanceCollection instances = readXLSInstances("/data/simpleOneSheetDate.xls", 0, typeName,
1, schema);

// Check the values of the date property in each instance
Iterator<Instance> instanceIt = instances.iterator();
while (instanceIt.hasNext()) {
Instance instance = instanceIt.next();
// Get the value of the date property
Object[] value = instance.getProperty(QName.valueOf(properties[properties.length - 1]));

// Ensure the value is not null
assertNotNull("Date property value is null", value);

// Ensure the value is an array with at least one element
assertTrue("Date property value is not an array or is empty", value.length > 0);

// Check the date string format
String dateString = (String) value[0];
assertTrue("Date string format is incorrect: " + dateString,
isStringDate(dateString, dateFormatter));
}

}

private Schema readXLSSchemaDate(String sourceLocation, int sheetIndex, String typeName,
String dateFormatter, String paramPropertyType) throws Exception {

XLSSchemaReader schemaReader = new XLSSchemaReader();
schemaReader.setSource(
new DefaultInputSupplier(getClass().getResource(sourceLocation).toURI()));
schemaReader.setParameter(InstanceTableIOConstants.SHEET_INDEX, Value.of(sheetIndex));
schemaReader.setParameter(CommonSchemaConstants.PARAM_TYPENAME, Value.of(typeName));
schemaReader.setParameter(AbstractTableSchemaReader.PARAM_PROPERTYTYPE,
Value.of(paramPropertyType));
schemaReader.setParameter(ReaderSettings.PARAMETER_DATE_FORMAT, Value.of(dateFormatter));

IOReport report = schemaReader.execute(null);
assertTrue("Schema import was not successfull.", report.isSuccess());

return schemaReader.getSchema();
}

/**
* @param input String
* @param dateFormatter date formatter
* @return true is the input String is of type Date
*/
public boolean isStringDate(String input, String dateFormatter) {
// Define the date format you expect
SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatter);
dateFormat.setLenient(false); // Disable lenient parsing

try {
// Try parsing the input string as a date
Date parsedDate = dateFormat.parse(input);
return true; // Parsing successful, input is a valid date
} catch (ParseException e) {
return false; // Parsing failed, input is not a valid date
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ private void update(int sheetNum) throws Exception {
// if the sheet is empty an Exception occurs
AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable(
getWizard().getProvider().getSource(),
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0);
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0, null);

setHeader(analyser.getHeader().toArray(new String[0]));

Expand Down
13 changes: 13 additions & 0 deletions io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@
ref="list">
</parameterComplexValue>
</providerParameter>
<providerParameter
description="Date Time Formatter specifying how values imported from Date cells should be imported"
label="Date Time Formatter"
name="dateTimeFormatterDefault"
optional="true">
<parameterBinding
class="java.lang.String">
</parameterBinding>
<valueDescriptor
default="dd.mm.yyyy"
defaultDescription="Default to dd.mm.yyyy">
</valueDescriptor>
</providerParameter>
</provider>
<provider
class="eu.esdihumboldt.hale.io.xls.writer.XLSInstanceWriter"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.time.format.DateTimeFormatter;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
Expand Down Expand Up @@ -46,36 +47,43 @@ public abstract class AbstractAnalyseTable {
* Load table to analyse from an Excel file (first sheet).
*
* @param source the source to load the file from
* @param xlsx
* @throws Exception if an error occurs loading the file
*/
protected void analyse(LocatableInputSupplier<InputStream> source, boolean xlsx)
throws Exception {
analyse(source, xlsx, 0, 0);
analyse(source, xlsx, 0, 0, null);
}

/**
* Load table to analyse from an Excel file.
*
* @param source the source to load the file from
* @param isXlsx if the file should be loaded as XLSX file
* @param sheetNum number of the sheet that should be loaded (0-based)
* @param skipNlines number of lines to skip
* @param dateTime
* @throws Exception if an error occurs loading the file
*/
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
int sheetNum, int skipNlines) throws Exception {
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean isXlsx,
int sheetNum, int skipNlines, String dateTime) throws Exception {
try (InputStream inp = new BufferedInputStream(source.getInput());) {
// https://poi.apache.org/components/spreadsheet/quick-guide.html#FileInputStream
URI location = source.getLocation();
Workbook wb = loadWorkbook(inp, location, xlsx);
Workbook wb = loadWorkbook(inp, location, isXlsx);

Sheet sheet = wb.getSheetAt(sheetNum);
evaluator = wb.getCreationHelper().createFormulaEvaluator();

DateTimeFormatter dateFormatter = null;
if (dateTime != null) {
dateFormatter = DateTimeFormatter.ofPattern(dateTime);
}
// the first might row represents the header
analyseHeader(sheet);
analyseHeader(sheet, dateFormatter);

// load configuration entries
analyseContent(sheet, skipNlines);
analyseContent(sheet, skipNlines, dateFormatter);
} finally {
// reset evaluator reference
evaluator = null;
Expand All @@ -88,15 +96,15 @@ protected void analyse(LocatableInputSupplier<? extends InputStream> source, boo
* @param input the input stream to load
* @param location an optional location that can be used to determine the
* file type
* @param xlsx if the file should be loaded as XLSX file
* @param isXlsx if the file should be loaded as XLSX file
* @return the loaded workbook
* @throws IOException if an error occurs reading the file
* @throws InvalidFormatException if file has an invalid format when
* attempting to load as OpenXML file
*/
public static Workbook loadWorkbook(InputStream input, URI location, boolean xlsx)
public static Workbook loadWorkbook(InputStream input, URI location, boolean isXlsx)
throws IOException, InvalidFormatException {
if (location != null && !xlsx && location.getPath().toLowerCase().endsWith(".xls")) {
if (location != null && !isXlsx && location.getPath().toLowerCase().endsWith(".xls")) {
try (POIFSFileSystem fs = new POIFSFileSystem(input)) {
return new HSSFWorkbook(fs.getRoot(), true);
}
Expand All @@ -111,16 +119,17 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls
* Analyzes the table header.
*
* @param sheet the table sheet
* @param dateTimeFormatter
*/
protected void analyseHeader(Sheet sheet) {
protected void analyseHeader(Sheet sheet, DateTimeFormatter dateTimeFormatter) {
Row header = sheet.getRow(0);
if (header != null) {

// identify columns
int count = 0;
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
Cell cell = header.getCell(i);
String text = extractText(cell, sheet);
String text = extractText(cell, sheet, dateTimeFormatter);
// cell cannot be empty to extract the text
if (text != null) {
headerCell(count, text);
Expand All @@ -142,11 +151,13 @@ protected void analyseHeader(Sheet sheet) {
* the skip line
*
* @param sheet the table sheet
* @param skipNlines
* @param dateTimeFormatter
*/
private void analyseContent(Sheet sheet, int skipNlines) {
private void analyseContent(Sheet sheet, int skipNlines, DateTimeFormatter dateTimeFormatter) {
for (int i = skipNlines; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
analyseRow(i, row, sheet);
analyseRow(i, row, sheet, dateTimeFormatter);
}
}

Expand All @@ -157,18 +168,22 @@ private void analyseContent(Sheet sheet, int skipNlines) {
* separately)
* @param row the table row
* @param sheet the sheet
* @param dateTimeFormatter
*/
protected abstract void analyseRow(int num, Row row, Sheet sheet);
protected abstract void analyseRow(int num, Row row, Sheet sheet,
DateTimeFormatter dateTimeFormatter);

/**
* Extract the text from a given cell. Formulas are evaluated, for blank or
* error cells <code>null</code> is returned
*
* @param cell the cell
* @param sheet to extract text
* @param dateTimeFormatter to convert the date into
* @return the cell text
*/
protected String extractText(Cell cell, Sheet sheet) {
return XLSUtil.extractText(cell, evaluator, sheet);
protected String extractText(Cell cell, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
return XLSUtil.extractText(cell, evaluator, sheet, dateTimeFormatter);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package eu.esdihumboldt.hale.io.xls;

import java.io.InputStream;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
Expand All @@ -41,17 +42,20 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable {
* Default constructor
*
* @param source the source to load the file from
* @param xlsx
* @param sheetNum number of the sheet in Excel file (0-based)
* @param skipNlines
* @param dateTime
*
* @throws Exception thrown if the analysis fails
*/
public AnalyseXLSSchemaTable(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
int sheetNum, int skipNlines) throws Exception {
int sheetNum, int skipNlines, String dateTime) throws Exception {

header = new ArrayList<String>();
rows = new LinkedHashMap<Integer, List<String>>();

analyse(source, xlsx, sheetNum, skipNlines);
analyse(source, xlsx, sheetNum, skipNlines, dateTime);
}

/**
Expand All @@ -71,11 +75,11 @@ protected void headerCell(int num, String text) {
* org.apache.poi.ss.usermodel.Row)
*/
@Override
protected void analyseRow(int num, Row row, Sheet sheet) {
protected void analyseRow(int num, Row row, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
if (row != null) {
List<String> rowContent = new ArrayList<String>();
for (int i = 0; i < row.getLastCellNum(); i++) {
rowContent.add(extractText(row.getCell(i), sheet));
rowContent.add(extractText(row.getCell(i), sheet, dateTimeFormatter));
}
if (!rowContent.isEmpty()
&& !rowContent.stream().allMatch(s -> s == null || s.isEmpty())) {
Expand Down
Loading

0 comments on commit bdaf0f4

Please sign in to comment.