Skip to content

Commit 98f0093

Browse files
feat: add formatter for the date
hale: add new Excel reader parameter that allows to specify import format for date cells At the moment the formatter has the following format: yyyy-MM-dd, but should be customizable by the user with a new issue. ING-4151
1 parent ec50283 commit 98f0093

File tree

11 files changed

+154
-33
lines changed

11 files changed

+154
-33
lines changed

io/plugins/eu.esdihumboldt.hale.io.xls.test/src/eu/esdihumboldt/hale/io/xls/test/XLSInstanceWriterTestExamples.groovy

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
package eu.esdihumboldt.hale.io.xls.test
1717

18+
import java.text.SimpleDateFormat
19+
import java.time.LocalDate
20+
1821
import eu.esdihumboldt.hale.common.instance.groovy.InstanceBuilder
1922
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection
2023
import eu.esdihumboldt.hale.common.schema.groovy.SchemaBuilder
@@ -26,6 +29,11 @@ class XLSInstanceWriterTestExamples {
2629

2730
Schema schema = createSchema()
2831

32+
// Declare a date in the "dd/mm/yyyy" format
33+
def dateString1 = "25/12/2023"
34+
def dateFormat1 = new SimpleDateFormat("dd/MM/yyyy")
35+
def date1 = dateFormat1.parse(dateString1)
36+
2937
// create the instance collection
3038
// concrete types are only strings, since the test is not able to choose the correct type in wizard
3139
InstanceCollection instances = new InstanceBuilder(types: schema).createCollection {
@@ -59,6 +67,7 @@ class XLSInstanceWriterTestExamples {
5967
name('other')
6068
number('1')
6169
description('other type')
70+
date(date1)
6271
}
6372
}
6473
}
@@ -91,9 +100,9 @@ class XLSInstanceWriterTestExamples {
91100
name(String)
92101
number(String)
93102
description(String)
103+
date(LocalDate)
94104
}
95105
}
96106
return schema;
97107
}
98-
99108
}

io/plugins/eu.esdihumboldt.hale.io.xls.ui/src/eu/esdihumboldt/hale/io/xls/ui/XLSSchemaTypePage.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ private void update(int sheetNum) throws Exception {
241241
// if the sheet is empty an Exception occurs
242242
AnalyseXLSSchemaTable analyser = new AnalyseXLSSchemaTable(
243243
getWizard().getProvider().getSource(),
244-
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0);
244+
ReaderSettings.isXlsxContentType(getWizard().getContentType()), sheetNum, 0, null);
245245

246246
setHeader(analyser.getHeader().toArray(new String[0]));
247247

io/plugins/eu.esdihumboldt.hale.io.xls/plugin.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,19 @@
150150
ref="list">
151151
</parameterComplexValue>
152152
</providerParameter>
153+
<providerParameter
154+
description="Date Time Formatter specifying how values imported from Date cells should be imported"
155+
label="Date Time Formatter"
156+
name="dateTimeFormatter"
157+
optional="true">
158+
<parameterBinding
159+
class="java.lang.String">
160+
</parameterBinding>
161+
<valueDescriptor
162+
default="dd.mm.yyyy"
163+
defaultDescription="Default to dd.mm.yyyy">
164+
</valueDescriptor>
165+
</providerParameter>
153166
</provider>
154167
<provider
155168
class="eu.esdihumboldt.hale.io.xls.writer.XLSInstanceWriter"

io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AbstractAnalyseTable.java

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.IOException;
2020
import java.io.InputStream;
2121
import java.net.URI;
22+
import java.time.format.DateTimeFormatter;
2223

2324
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
2425
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
@@ -46,36 +47,43 @@ public abstract class AbstractAnalyseTable {
4647
* Load table to analyse from an Excel file (first sheet).
4748
*
4849
* @param source the source to load the file from
50+
* @param xlsx
4951
* @throws Exception if an error occurs loading the file
5052
*/
5153
protected void analyse(LocatableInputSupplier<InputStream> source, boolean xlsx)
5254
throws Exception {
53-
analyse(source, xlsx, 0, 0);
55+
analyse(source, xlsx, 0, 0, null);
5456
}
5557

5658
/**
5759
* Load table to analyse from an Excel file.
5860
*
5961
* @param source the source to load the file from
62+
* @param isXlsx if the file should be loaded as XLSX file
6063
* @param sheetNum number of the sheet that should be loaded (0-based)
6164
* @param skipNlines number of lines to skip
65+
* @param dateTime
6266
* @throws Exception if an error occurs loading the file
6367
*/
64-
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
65-
int sheetNum, int skipNlines) throws Exception {
68+
protected void analyse(LocatableInputSupplier<? extends InputStream> source, boolean isXlsx,
69+
int sheetNum, int skipNlines, String dateTime) throws Exception {
6670
try (InputStream inp = new BufferedInputStream(source.getInput());) {
6771
// https://poi.apache.org/components/spreadsheet/quick-guide.html#FileInputStream
6872
URI location = source.getLocation();
69-
Workbook wb = loadWorkbook(inp, location, xlsx);
73+
Workbook wb = loadWorkbook(inp, location, isXlsx);
7074

7175
Sheet sheet = wb.getSheetAt(sheetNum);
7276
evaluator = wb.getCreationHelper().createFormulaEvaluator();
7377

78+
DateTimeFormatter dateFormatter = null;
79+
if (dateTime != null) {
80+
dateFormatter = DateTimeFormatter.ofPattern(dateTime);
81+
}
7482
// the first might row represents the header
75-
analyseHeader(sheet);
83+
analyseHeader(sheet, dateFormatter);
7684

7785
// load configuration entries
78-
analyseContent(sheet, skipNlines);
86+
analyseContent(sheet, skipNlines, dateFormatter);
7987
} finally {
8088
// reset evaluator reference
8189
evaluator = null;
@@ -88,15 +96,15 @@ protected void analyse(LocatableInputSupplier<? extends InputStream> source, boo
8896
* @param input the input stream to load
8997
* @param location an optional location that can be used to determine the
9098
* file type
91-
* @param xlsx if the file should be loaded as XLSX file
99+
* @param isXlsx if the file should be loaded as XLSX file
92100
* @return the loaded workbook
93101
* @throws IOException if an error occurs reading the file
94102
* @throws InvalidFormatException if file has an invalid format when
95103
* attempting to load as OpenXML file
96104
*/
97-
public static Workbook loadWorkbook(InputStream input, URI location, boolean xlsx)
105+
public static Workbook loadWorkbook(InputStream input, URI location, boolean isXlsx)
98106
throws IOException, InvalidFormatException {
99-
if (location != null && !xlsx && location.getPath().toLowerCase().endsWith(".xls")) {
107+
if (location != null && !isXlsx && location.getPath().toLowerCase().endsWith(".xls")) {
100108
try (POIFSFileSystem fs = new POIFSFileSystem(input)) {
101109
return new HSSFWorkbook(fs.getRoot(), true);
102110
}
@@ -111,16 +119,17 @@ public static Workbook loadWorkbook(InputStream input, URI location, boolean xls
111119
* Analyzes the table header.
112120
*
113121
* @param sheet the table sheet
122+
* @param dateTimeFormatter
114123
*/
115-
protected void analyseHeader(Sheet sheet) {
124+
protected void analyseHeader(Sheet sheet, DateTimeFormatter dateTimeFormatter) {
116125
Row header = sheet.getRow(0);
117126
if (header != null) {
118127

119128
// identify columns
120129
int count = 0;
121130
for (int i = header.getFirstCellNum(); i < header.getLastCellNum(); i++) {
122131
Cell cell = header.getCell(i);
123-
String text = extractText(cell, sheet);
132+
String text = extractText(cell, sheet, dateTimeFormatter);
124133
// cell cannot be empty to extract the text
125134
if (text != null) {
126135
headerCell(count, text);
@@ -142,11 +151,13 @@ protected void analyseHeader(Sheet sheet) {
142151
* the skip line
143152
*
144153
* @param sheet the table sheet
154+
* @param skipNlines
155+
* @param dateTimeFormatter
145156
*/
146-
private void analyseContent(Sheet sheet, int skipNlines) {
157+
private void analyseContent(Sheet sheet, int skipNlines, DateTimeFormatter dateTimeFormatter) {
147158
for (int i = skipNlines; i <= sheet.getLastRowNum(); i++) {
148159
Row row = sheet.getRow(i);
149-
analyseRow(i, row, sheet);
160+
analyseRow(i, row, sheet, dateTimeFormatter);
150161
}
151162
}
152163

@@ -157,18 +168,22 @@ private void analyseContent(Sheet sheet, int skipNlines) {
157168
* separately)
158169
* @param row the table row
159170
* @param sheet the sheet
171+
* @param dateTimeFormatter
160172
*/
161-
protected abstract void analyseRow(int num, Row row, Sheet sheet);
173+
protected abstract void analyseRow(int num, Row row, Sheet sheet,
174+
DateTimeFormatter dateTimeFormatter);
162175

163176
/**
164177
* Extract the text from a given cell. Formulas are evaluated, for blank or
165178
* error cells <code>null</code> is returned
166179
*
167180
* @param cell the cell
181+
* @param sheet to extract text
182+
* @param dateTimeFormatter to convert the date into
168183
* @return the cell text
169184
*/
170-
protected String extractText(Cell cell, Sheet sheet) {
171-
return XLSUtil.extractText(cell, evaluator, sheet);
185+
protected String extractText(Cell cell, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
186+
return XLSUtil.extractText(cell, evaluator, sheet, dateTimeFormatter);
172187
}
173188

174189
}

io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/AnalyseXLSSchemaTable.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package eu.esdihumboldt.hale.io.xls;
1717

1818
import java.io.InputStream;
19+
import java.time.format.DateTimeFormatter;
1920
import java.util.ArrayList;
2021
import java.util.Collection;
2122
import java.util.LinkedHashMap;
@@ -41,17 +42,20 @@ public class AnalyseXLSSchemaTable extends AbstractAnalyseTable {
4142
* Default constructor
4243
*
4344
* @param source the source to load the file from
45+
* @param xlsx
4446
* @param sheetNum number of the sheet in Excel file (0-based)
47+
* @param skipNlines
48+
* @param dateTime
4549
*
4650
* @throws Exception thrown if the analysis fails
4751
*/
4852
public AnalyseXLSSchemaTable(LocatableInputSupplier<? extends InputStream> source, boolean xlsx,
49-
int sheetNum, int skipNlines) throws Exception {
53+
int sheetNum, int skipNlines, String dateTime) throws Exception {
5054

5155
header = new ArrayList<String>();
5256
rows = new LinkedHashMap<Integer, List<String>>();
5357

54-
analyse(source, xlsx, sheetNum, skipNlines);
58+
analyse(source, xlsx, sheetNum, skipNlines, dateTime);
5559
}
5660

5761
/**
@@ -71,11 +75,11 @@ protected void headerCell(int num, String text) {
7175
* org.apache.poi.ss.usermodel.Row)
7276
*/
7377
@Override
74-
protected void analyseRow(int num, Row row, Sheet sheet) {
78+
protected void analyseRow(int num, Row row, Sheet sheet, DateTimeFormatter dateTimeFormatter) {
7579
if (row != null) {
7680
List<String> rowContent = new ArrayList<String>();
7781
for (int i = 0; i < row.getLastCellNum(); i++) {
78-
rowContent.add(extractText(row.getCell(i), sheet));
82+
rowContent.add(extractText(row.getCell(i), sheet, dateTimeFormatter));
7983
}
8084
if (!rowContent.isEmpty()
8185
&& !rowContent.stream().allMatch(s -> s == null || s.isEmpty())) {

io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/XLSUtil.java

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,16 @@
1515

1616
package eu.esdihumboldt.hale.io.xls;
1717

18+
import java.time.LocalDateTime;
19+
import java.time.ZoneId;
20+
import java.time.format.DateTimeFormatter;
21+
import java.time.format.FormatStyle;
22+
import java.util.Date;
23+
1824
import org.apache.poi.ss.usermodel.Cell;
1925
import org.apache.poi.ss.usermodel.CellType;
2026
import org.apache.poi.ss.usermodel.CellValue;
27+
import org.apache.poi.ss.usermodel.DateUtil;
2128
import org.apache.poi.ss.usermodel.FormulaEvaluator;
2229
import org.apache.poi.ss.usermodel.Row;
2330
import org.apache.poi.ss.usermodel.Sheet;
@@ -30,15 +37,23 @@
3037
*/
3138
public class XLSUtil {
3239

40+
/**
41+
* Default Time Formatter
42+
*/
43+
public static final String PARAMETER_DATE_FORMAT = "yyyy-MM-dd";
44+
3345
/**
3446
* Extract the text from a given cell. Formulas are evaluated, for blank or
3547
* error cells <code>null</code> is returned
3648
*
3749
* @param cell the cell
3850
* @param evaluator the formula evaluator
51+
* @param sheet to extract the text
52+
* @param dateTimeFormatter to convert the date into
3953
* @return the cell text
4054
*/
41-
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet) {
55+
public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sheet,
56+
DateTimeFormatter dateTimeFormatter) {
4257
if (cell == null)
4358
return null;
4459

@@ -65,11 +80,31 @@ public static String extractText(Cell cell, FormulaEvaluator evaluator, Sheet sh
6580
case BOOLEAN:
6681
return String.valueOf(value.getBooleanValue());
6782
case NUMERIC:
68-
double number = value.getNumberValue();
69-
if (number == Math.floor(number)) {
70-
return String.valueOf((int) number);
83+
if (DateUtil.isCellDateFormatted(cell)) {
84+
// Get the date value from the cell
85+
Date dateCellValue = cell.getDateCellValue();
86+
87+
// Convert java.util.Date to java.time.LocalDateTime
88+
LocalDateTime localDateTime = dateCellValue.toInstant()
89+
.atZone(ZoneId.systemDefault()).toLocalDateTime();
90+
91+
// Define a DateTimeFormatter with a specific pattern
92+
if (dateTimeFormatter == null) {
93+
dateTimeFormatter = DateTimeFormatter.ofLocalizedDate(FormatStyle.SHORT);
94+
}
95+
// Format LocalDateTime using DateTimeFormatter
96+
String formattedDate = localDateTime.format(dateTimeFormatter);
97+
98+
return formattedDate;
99+
}
100+
else {
101+
double number = value.getNumberValue();
102+
if (number == Math.floor(number)) {
103+
return String.valueOf((int) number);
104+
}
105+
106+
return String.valueOf(value.getNumberValue());
71107
}
72-
return String.valueOf(value.getNumberValue());
73108
case STRING:
74109
return value.getStringValue();
75110
default:

io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/DefaultXLSLookupTableReader.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,10 @@ public Map<Value, Value> read(Workbook workbook, boolean skipFirst, int keyColum
5858
Row currentRow = sheet.getRow(row);
5959
if (currentRow != null) {
6060
String value = XLSUtil.extractText(currentRow.getCell(valueColumn), evaluator,
61-
sheet);
61+
sheet, null);
6262
if (value != null && (!ignoreEmptyStrings || !value.isEmpty())) {
63-
map.put(Value.of(
64-
XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator, sheet)),
65-
Value.of(value));
63+
map.put(Value.of(XLSUtil.extractText(currentRow.getCell(keyColumn), evaluator,
64+
sheet, null)), Value.of(value));
6665
}
6766
}
6867
}

io/plugins/eu.esdihumboldt.hale.io.xls/src/eu/esdihumboldt/hale/io/xls/reader/ReaderSettings.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ public class ReaderSettings {
5353
*/
5454
public static final String PARAMETER_SHEET_SETTINGS = "sheetSettings";
5555

56+
/**
57+
* Parameter for the reader specifying how values imported from Date cells
58+
* should be formatted.
59+
*/
60+
public static final String PARAMETER_DATE_FORMAT = "dateTimeFormatter";
61+
5662
/**
5763
* Collect information and settings on a single sheet.
5864
*/
@@ -97,6 +103,9 @@ public void applySettings(SheetSettings settings) {
97103
if (settings.getSkipLines() != null) {
98104
this.settings.setSkipLines(settings.getSkipLines());
99105
}
106+
if (settings.getDateTime() != null) {
107+
this.settings.setDateTime(settings.getDateTime());
108+
}
100109
}
101110

102111
/**
@@ -190,9 +199,14 @@ else if (skipType) {
190199
else {
191200
skipNlines = 0;
192201
}
202+
203+
// read dateFormat
204+
String dateFormatString = reader.getParameter(PARAMETER_DATE_FORMAT).as(String.class);
205+
193206
// apply to all sheets as default
194207
for (SheetInfo sheet : sheets) {
195208
sheet.getSettings().setSkipLines(skipNlines);
209+
sheet.getSettings().setDateTime(dateFormatString);
196210
}
197211

198212
// determine if multi sheet mode, defaults to false for backwards

0 commit comments

Comments
 (0)