Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NIFI-14106 Fixed bug which led to loss of dates and times precision when using SplitExcel. #9718

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,13 @@ This includes derived works from Spring Framework available under Apache Softwar
The derived work is adapted from
https://github.com/spring-projects/spring-framework/blob/main/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java
and can be found in
nifi-api/src/main/java/org/apache/nifi/processor/util/URLValidator.java
nifi-api/src/main/java/org/apache/nifi/processor/util/URLValidator.java

This includes derived works from Apache POI available under Apache Software License V2
Copyright 2003-2025 The Apache Software Foundation
The derived work is adapted from
https://svn.apache.org/repos/asf/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/usermodel/XSSFSheet (copyRows method)
https://svn.apache.org/repos/asf/poi/trunk/poi-ooxml/src/main/java/org/apache/poi/xssf/usermodel/XSSFRow (copyRowFrom method)
https://svn.apache.org/repos/asf/poi/trunk/poi/src/main/java/org/apache/poi/ss/util/CellUtil.java (copyCell method).
and can be found in
nifi-extension-bundles/nifi-poi-bundle/nifi-poi-services/src/main/java/org/apache/nifi/processors/excel/SplitExcel.java.SheetCopyStrategy
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,29 @@
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.util.StandardValidators;
import org.apache.poi.common.Duplicatable;
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.formula.FormulaShifter;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellCopyContext;
import org.apache.poi.ss.usermodel.CellCopyPolicy;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Hyperlink;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter;

import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -122,7 +135,7 @@ public class SplitExcel extends AbstractProcessor {

private static final CellCopyPolicy CELL_COPY_POLICY = new CellCopyPolicy.Builder()
.cellFormula(false) // NOTE: setting to false allows for copying the evaluated formula value.
.cellStyle(false) // NOTE: setting to false avoids exceeding the maximum number of cell styles (64000) in a .xlsx Workbook.
.cellStyle(CellCopyPolicy.DEFAULT_COPY_CELL_STYLE_POLICY)
.cellValue(CellCopyPolicy.DEFAULT_COPY_CELL_VALUE_POLICY)
.condenseRows(CellCopyPolicy.DEFAULT_CONDENSE_ROWS_POLICY)
.copyHyperlink(CellCopyPolicy.DEFAULT_COPY_HYPERLINK_POLICY)
Expand Down Expand Up @@ -173,7 +186,10 @@ public void onTrigger(ProcessContext context, ProcessSession session) throws Pro
}

if (!originalRows.isEmpty()) {
newSheet.copyRows(originalRows, originalSheet.getFirstRowNum(), CELL_COPY_POLICY);
// TODO after upgrading to next release of POI, the code in the line below can be replaced with
// newSheet.copyRows(originalRows, originalSheet.getFirstRowNum(), CELL_COPY_POLICY);
// and the SheetCopyStrategy class can be removed entirely.
SheetCopyStrategy.copyRows(newSheet, originalRows, originalSheet.getFirstRowNum(), CELL_COPY_POLICY);
}

FlowFile newFlowFile = session.create(originalFlowFile);
Expand Down Expand Up @@ -230,4 +246,213 @@ public void onTrigger(ProcessContext context, ProcessSession session) throws Pro

private record WorkbookSplit(int index, FlowFile content, String sheetName, int numRows) {
}

/*
The purpose of this class is to encapsulate the code necessary to copy cells correctly without losing
dates and times precision and avoid exceeding the maximum number of cell styles (64000) in a .xlsx Workbook.
This is a stop gap until Apache POI's next release which has incorporated this change in the XSSFSheet copyRows method.
*/
private static class SheetCopyStrategy {
/*org.apache.poi.xssf.usermodel.XSSFSheet copyRows method refactored with Intellij suggestions
and instantiates a CellCopyContext to avoid exceeding the maximum number of cell styles (64000) in a .xlsx Workbook.
This instance of CellCopyContext is eventually passed to copyCell.
*/
static void copyRows(XSSFSheet destSheet, List<? extends Row> srcRows, int destStartRow, CellCopyPolicy cellCopyPolicy) {
if (srcRows != null && !srcRows.isEmpty()) {
Row srcStartRow = srcRows.getFirst();
Row srcEndRow = srcRows.getLast();

if (srcStartRow == null) {
throw new IllegalArgumentException("copyRows: First row cannot be null");
} else {
int srcStartRowNum = srcStartRow.getRowNum();
int srcEndRowNum = srcEndRow.getRowNum();

for (int index = 1; index < srcRows.size(); ++index) {
Row curRow = srcRows.get(index);
if (curRow == null) {
throw new IllegalArgumentException("srcRows may not contain null rows. Found null row at index " + index + ".");
}

if (srcStartRow.getSheet().getWorkbook() != curRow.getSheet().getWorkbook()) {
throw new IllegalArgumentException("All rows in srcRows must belong to the same sheet in the same workbook. Expected all rows from same workbook ("
+ srcStartRow.getSheet().getWorkbook() + "). Got srcRows[" + index + "] from different workbook (" + curRow.getSheet().getWorkbook() + ").");
}

if (srcStartRow.getSheet() != curRow.getSheet()) {
throw new IllegalArgumentException("All rows in srcRows must belong to the same sheet. Expected all rows from "
+ srcStartRow.getSheet().getSheetName() + ". Got srcRows[" + index + "] from " + curRow.getSheet().getSheetName());
}
}

CellCopyPolicy options = new CellCopyPolicy(cellCopyPolicy);
options.setCopyMergedRegions(false);
int r = destStartRow;
CellCopyContext context = new CellCopyContext();

for (Row srcRow : srcRows) {
int destRowNum;
if (SplitExcel.CELL_COPY_POLICY.isCondenseRows()) {
destRowNum = r++;
} else {
int shift = srcRow.getRowNum() - srcStartRowNum;
destRowNum = destStartRow + shift;
}

XSSFRow destRow = destSheet.createRow(destRowNum);
copyRowFrom(destRow, srcRow, options, context);
}

if (SplitExcel.CELL_COPY_POLICY.isCopyMergedRegions()) {
int shift = destStartRow - srcStartRowNum;

for (CellRangeAddress srcRegion : srcStartRow.getSheet().getMergedRegions()) {
if (srcStartRowNum <= srcRegion.getFirstRow() && srcRegion.getLastRow() <= srcEndRowNum) {
CellRangeAddress destRegion = srcRegion.copy();
destRegion.setFirstRow(destRegion.getFirstRow() + shift);
destRegion.setLastRow(destRegion.getLastRow() + shift);
destSheet.addMergedRegion(destRegion);
}
}
}
}
} else {
throw new IllegalArgumentException("No rows to copy");
}
}

/*org.apache.poi.xssf.usermodel.XSSFRow copyRowFrom method refactored with Intellij suggestions*/
static void copyRowFrom(Row destRow, Row srcRow, CellCopyPolicy policy, CellCopyContext context) {
if (srcRow == null) {
for (Cell cell : destRow) {
copyCell(null, cell, policy, context);
}

if (policy.isCopyMergedRegions()) {
int destRowNum = destRow.getRowNum();
int index = 0;
Set<Integer> indices = new HashSet<>();

for (Iterator<CellRangeAddress> cellRangeAddressIterator = destRow.getSheet().getMergedRegions().iterator(); cellRangeAddressIterator.hasNext(); ++index) {
CellRangeAddress destRegion = cellRangeAddressIterator.next();
if (destRowNum == destRegion.getFirstRow() && destRowNum == destRegion.getLastRow()) {
indices.add(index);
}
}

destRow.getSheet().removeMergedRegions(indices);
}

if (policy.isCopyRowHeight()) {
destRow.setHeight((short) -1);
}
} else {
for (Cell srcCell : srcRow) {
Cell destCell = destRow.createCell(srcCell.getColumnIndex());
copyCell(srcCell, destCell, policy, context);
}

int destRowNum = destRow.getSheet().getWorkbook().getSheetIndex(destRow.getSheet());
String sheetName = destRow.getSheet().getWorkbook().getSheetName(destRowNum);
int srcRowNum = srcRow.getRowNum();
destRowNum = destRow.getRowNum();
int rowDifference = destRowNum - srcRowNum;
FormulaShifter formulaShifter = FormulaShifter.createForRowCopy(destRowNum, sheetName, srcRowNum, srcRowNum, rowDifference, SpreadsheetVersion.EXCEL2007);
XSSFRowShifter rowShifter = new XSSFRowShifter((XSSFSheet) destRow.getSheet());
rowShifter.updateRowFormulas((XSSFRow) destRow, formulaShifter);

if (policy.isCopyMergedRegions()) {
for (CellRangeAddress srcRegion : srcRow.getSheet().getMergedRegions()) {
if (srcRowNum == srcRegion.getFirstRow() && srcRowNum == srcRegion.getLastRow()) {
CellRangeAddress destRegion = srcRegion.copy();
destRegion.setFirstRow(destRowNum);
destRegion.setLastRow(destRowNum);
destRow.getSheet().addMergedRegion(destRegion);
}
}
}

if (policy.isCopyRowHeight()) {
destRow.setHeight(srcRow.getHeight());
}
}
}

/*Taken from org.apache.poi.ss.util.CellUtil refactored with Intellij suggestions and to only
use the numeric setter for numeric values even though the cell may represent a date, otherwise
dates which represent times are not set correctly.
*/
static void copyCell(Cell srcCell, Cell destCell, CellCopyPolicy policy, CellCopyContext context) {
if (policy.isCopyCellValue()) {
if (srcCell != null) {
CellType copyCellType = srcCell.getCellType();
if (copyCellType == CellType.FORMULA && !policy.isCopyCellFormula()) {
copyCellType = srcCell.getCachedFormulaResultType();
}

switch (copyCellType) {
case NUMERIC ->
/*NOTE: The original CellUtil code determined whether the cell was date formatted and if so
used the setter method which takes a java.util.Date. That proved problematic for cells which
had formatted times with an illegal Excel date of 12/31/1899. To bypass that issue, only
the setter for the numeric value is used.*/
destCell.setCellValue(srcCell.getNumericCellValue());
case STRING -> destCell.setCellValue(srcCell.getRichStringCellValue());
case FORMULA -> destCell.setCellFormula(srcCell.getCellFormula());
case BLANK -> destCell.setBlank();
case BOOLEAN -> destCell.setCellValue(srcCell.getBooleanCellValue());
case ERROR -> destCell.setCellErrorValue(srcCell.getErrorCellValue());
default -> throw new IllegalArgumentException("Invalid cell type " + srcCell.getCellType());
}
} else {
destCell.setBlank();
}
}

if (policy.isCopyCellStyle() && srcCell != null) {
if (srcCell.getSheet() != null && destCell.getSheet() != null && destCell.getSheet().getWorkbook() == srcCell.getSheet().getWorkbook()) {
destCell.setCellStyle(srcCell.getCellStyle());
} else {
CellStyle srcStyle = srcCell.getCellStyle();
CellStyle destStyle = context == null ? null : context.getMappedStyle(srcStyle);
if (destStyle == null) {
destStyle = destCell.getSheet().getWorkbook().createCellStyle();
destStyle.cloneStyleFrom(srcStyle);
if (context != null) {
context.putMappedStyle(srcStyle, destStyle);
}
}

destCell.setCellStyle(destStyle);
}
}

Hyperlink srcHyperlink = srcCell == null ? null : srcCell.getHyperlink();
Hyperlink newHyperlink;
if (policy.isMergeHyperlink()) {
if (srcHyperlink != null) {
if (!(srcHyperlink instanceof Duplicatable)) {
throw new IllegalStateException("srcCell hyperlink is not an instance of Duplicatable");
}

newHyperlink = (Hyperlink) ((Duplicatable) srcHyperlink).copy();
destCell.setHyperlink(newHyperlink);
}
} else if (policy.isCopyHyperlink()) {
if (srcHyperlink == null) {
destCell.setHyperlink(null);
} else {
if (!(srcHyperlink instanceof Duplicatable)) {
throw new IllegalStateException("srcCell hyperlink is not an instance of Duplicatable");
}

newHyperlink = (Hyperlink) ((Duplicatable) srcHyperlink).copy();
destCell.setHyperlink(newHyperlink);
}
}
}
}

}


Loading