From 1f7a882d7d11e6c1391aefc8340ba04c95d4facf Mon Sep 17 00:00:00 2001 From: Trevor Gerhardt Date: Fri, 27 Oct 2023 13:53:14 +0800 Subject: [PATCH] Convert FileItems to Files before using We use Apache FileUpload to assist with handling incoming files. We also use a DiskItemFactory that persists each incoming file to disk immediately. This means that we could represent each incoming file as a java.io.File instead of a FileItem. java.io.File is a much more common way of handling a file and easier to work with. --- .../OpportunityDatasetController.java | 62 ++++++++++--------- .../datasource/DataSourceUploadAction.java | 32 ++++++---- .../analysis/datasource/DataSourceUtil.java | 22 +++---- .../conveyal/r5/analyst/FreeFormPointSet.java | 9 +-- .../java/com/conveyal/r5/analyst/Grid.java | 8 +-- 5 files changed, 69 insertions(+), 64 deletions(-) diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..5941cfeec 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -11,7 +11,6 @@ import com.conveyal.analysis.persistence.AnalysisCollection; import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.analysis.persistence.Persistence; -import com.conveyal.analysis.util.FileItemInputStreamProvider; import com.conveyal.analysis.util.HttpUtils; import com.conveyal.analysis.util.JsonUtil; import com.conveyal.file.FileStorage; @@ -25,12 +24,12 @@ import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.analyst.progress.WorkProduct; import com.conveyal.r5.util.ExceptionUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.io.Files; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; +import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import org.bson.types.ObjectId; import org.slf4j.Logger; @@ -275,7 +274,7 @@ private static FileStorageFormat getFormatCode (PointSet pointSet) { * This method executes in a blocking (synchronous) manner, but it can take a while so should be called within an * non-blocking asynchronous task. */ - private List createFreeFormPointSetsFromCsv(FileItem csvFileItem, Map params) { + private List createFreeFormPointSetsFromCsv(File csvFile, Map params) { String latField = params.get("latField"); String lonField = params.get("lonField"); @@ -296,12 +295,11 @@ private List createFreeFormPointSetsFromCsv(FileItem csvFileIt try { List pointSets = new ArrayList<>(); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField, lonField, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField, lonField, idField, countField)); // The second pair of lat and lon fields allow creating two matched pointsets from the same CSV. // This is used for one-to-one travel times between specific origins/destinations. if (latField2 != null && lonField2 != null) { - pointSets.add(FreeFormPointSet.fromCsv(csvStreamProvider, latField2, lonField2, idField, countField)); + pointSets.add(FreeFormPointSet.fromCsv(csvFile, latField2, lonField2, idField, countField)); } return pointSets; } catch (Exception e) { @@ -331,6 +329,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, sourceName); addStatusAndRemoveOldStatuses(status); + final List files = new ArrayList<>(); final List fileItems; final FileStorageFormat uploadFormat; final Map parameters; @@ -338,7 +337,11 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // Validate inputs and parameters, which will throw an exception if there's anything wrong with them. // Call remove() rather than get() so that subsequent code will see only string parameters, not the files. fileItems = formFields.remove("files"); - uploadFormat = detectUploadFormatAndValidate(fileItems); + for (var fi : fileItems) { + var dfi = (DiskFileItem) fi; + files.add(dfi.getStoreLocation()); + } + uploadFormat = detectUploadFormatAndValidate(files); parameters = extractStringParameters(formFields); } catch (Exception e) { status.completeWithError(e); @@ -354,35 +357,35 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res List pointsets = new ArrayList<>(); if (uploadFormat == FileStorageFormat.GRID) { LOG.info("Detected opportunity dataset stored in Conveyal binary format."); - pointsets.addAll(createGridsFromBinaryGridFiles(fileItems, status)); + pointsets.addAll(createGridsFromBinaryGridFiles(files, status)); } else if (uploadFormat == FileStorageFormat.SHP) { LOG.info("Detected opportunity dataset stored as ESRI shapefile."); - pointsets.addAll(createGridsFromShapefile(fileItems, zoom, status)); + pointsets.addAll(createGridsFromShapefile(files, zoom, status)); } else if (uploadFormat == FileStorageFormat.CSV) { LOG.info("Detected opportunity dataset stored as CSV"); // Create a grid even when user has requested a freeform pointset so we have something to visualize. - FileItem csvFileItem = fileItems.get(0); + File csvFile = files.get(0); // FIXME why were we uploading to S3 using the file path not the UUID? // writeFileToS3(csvFile); // TODO report progress / status as with grids. That involves pre-scanning the CSV which would be // facilitated by retaining the CSV server side and later converting to pointset. boolean requestedFreeForm = Boolean.parseBoolean(parameters.get("freeform")); // Hack to enable freeform pointset building without exposing a UI element, via file name. - if (csvFileItem.getName().contains("FREEFORM_PS.")) { + if (csvFile.getName().contains("FREEFORM_PS.")) { requestedFreeForm = true; } if (requestedFreeForm) { LOG.info("Processing CSV as freeform (rather than gridded) pointset as requested."); // This newer process creates a FreeFormPointSet only for the specified count fields, // as well as a Grid to assist in visualization of the uploaded data. - for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFileItem, parameters)) { + for (FreeFormPointSet freeForm : createFreeFormPointSetsFromCsv(csvFile, parameters)) { Grid gridFromFreeForm = Grid.fromFreeForm(freeForm, zoom); pointsets.add(freeForm); pointsets.add(gridFromFreeForm); } } else { // This is the common default process: create a grid for every non-ignored field in the CSV. - pointsets.addAll(createGridsFromCsv(csvFileItem, formFields, zoom, status)); + pointsets.addAll(createGridsFromCsv(csvFile, formFields, zoom, status)); } } if (pointsets.isEmpty()) { @@ -473,7 +476,7 @@ private OpportunityDataset deleteDataset(String id, UserPermissions userPermissi * TODO explain latField2 usage * @return one or two Grids for each numeric column in the CSV input. */ - private List createGridsFromCsv(FileItem csvFileItem, + private List createGridsFromCsv(File csvFile, Map> query, int zoom, OpportunityDatasetUploadStatus status) throws Exception { @@ -488,12 +491,11 @@ private List createGridsFromCsv(FileItem csvFileItem, String lonField2 = HttpUtils.getFormField(query, "lonField2", false); List ignoreFields = Arrays.asList(idField, latField2, lonField2); - InputStreamProvider csvStreamProvider = new FileItemInputStreamProvider(csvFileItem); - List grids = Grid.fromCsv(csvStreamProvider, latField, lonField, ignoreFields, zoom, status); + List grids = Grid.fromCsv(csvFile, latField, lonField, ignoreFields, zoom, status); // TODO verify correctness of this second pass if (latField2 != null && lonField2 != null) { ignoreFields = Arrays.asList(idField, latField, lonField); - grids.addAll(Grid.fromCsv(csvStreamProvider, latField2, lonField2, ignoreFields, zoom, status)); + grids.addAll(Grid.fromCsv(csvFile, latField2, lonField2, ignoreFields, zoom, status)); } return grids; @@ -503,14 +505,14 @@ private List createGridsFromCsv(FileItem csvFileItem, * Create a grid from an input stream containing a binary grid file. * For those in the know, we can upload manually created binary grid files. */ - private List createGridsFromBinaryGridFiles(List uploadedFiles, + private List createGridsFromBinaryGridFiles(List uploadedFiles, OpportunityDatasetUploadStatus status) throws Exception { List grids = new ArrayList<>(); status.totalFeatures = uploadedFiles.size(); - for (FileItem fileItem : uploadedFiles) { - Grid grid = Grid.read(fileItem.getInputStream()); - String name = fileItem.getName(); + for (File file : uploadedFiles) { + Grid grid = Grid.read(FileUtils.getInputStream(file)); + String name = file.getName(); // Remove ".grid" from the name if (name.contains(".grid")) name = name.split(".grid")[0]; grid.name = name; @@ -522,37 +524,37 @@ private List createGridsFromBinaryGridFiles(List uploadedFiles, } /** - * Preconditions: fileItems must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the + * Preconditions: files must contain SHP, DBF, and PRJ files, and optionally SHX. All files should have the * same base name, and should not contain any other files but these three or four. */ - private List createGridsFromShapefile(List fileItems, + private List createGridsFromShapefile(List files, int zoom, OpportunityDatasetUploadStatus status) throws Exception { // In the caller, we should have already verified that all files have the same base name and have an extension. // Extract the relevant files: .shp, .prj, .dbf, and .shx. // We need the SHX even though we're looping over every feature as they might be sparse. - Map filesByExtension = new HashMap<>(); - for (FileItem fileItem : fileItems) { - filesByExtension.put(FilenameUtils.getExtension(fileItem.getName()).toUpperCase(), fileItem); + Map filesByExtension = new HashMap<>(); + for (File file : files) { + filesByExtension.put(FilenameUtils.getExtension(file.getName()).toUpperCase(), file); } // Copy the shapefile component files into a temporary directory with a fixed base name. File tempDir = Files.createTempDir(); File shpFile = new File(tempDir, "grid.shp"); - filesByExtension.get("SHP").write(shpFile); + Files.copy(filesByExtension.get("SHP"), shpFile); File prjFile = new File(tempDir, "grid.prj"); - filesByExtension.get("PRJ").write(prjFile); + Files.copy(filesByExtension.get("PRJ"), prjFile); File dbfFile = new File(tempDir, "grid.dbf"); - filesByExtension.get("DBF").write(dbfFile); + Files.copy(filesByExtension.get("DBF"), dbfFile); // The .shx file is an index. It is optional, and not needed for dense shapefiles. if (filesByExtension.containsKey("SHX")) { File shxFile = new File(tempDir, "grid.shx"); - filesByExtension.get("SHX").write(shxFile); + Files.copy(filesByExtension.get("SHX"), shxFile); } List grids = Grid.fromShapefile(shpFile, zoom, status); diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java index 39f6740ab..2adc53197 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUploadAction.java @@ -15,13 +15,14 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.stream.Collectors; -import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.analysis.datasource.DataSourceUtil.detectUploadFormatAndValidate; +import static com.conveyal.analysis.util.HttpUtils.getFormField; import static com.conveyal.file.FileCategory.DATASOURCES; import static com.conveyal.file.FileStorageFormat.SHP; import static com.google.common.base.Preconditions.checkNotNull; @@ -42,7 +43,7 @@ public class DataSourceUploadAction implements TaskAction { private final AnalysisCollection dataSourceCollection; /** The files provided in the HTTP post form. These will be moved into storage. */ - private final List fileItems; + private final List files; /** * This DataSourceIngester provides encapsulated loading and validation logic for a single format, by composition @@ -64,12 +65,12 @@ public String getDataSourceName () { public DataSourceUploadAction ( FileStorage fileStorage, AnalysisCollection dataSourceCollection, - List fileItems, + List files, DataSourceIngester ingester ) { this.fileStorage = fileStorage; this.dataSourceCollection = dataSourceCollection; - this.fileItems = fileItems; + this.files = files; this.ingester = ingester; } @@ -94,18 +95,17 @@ private final void moveFilesIntoStorage (ProgressListener progressListener) { // (with filenames that correspond to the source id) progressListener.beginTask("Moving files into storage...", 1); final String dataSourceId = ingester.dataSource()._id.toString(); - for (FileItem fileItem : fileItems) { - DiskFileItem dfi = (DiskFileItem) fileItem; + for (File file : files) { // Use canonical extension from file type - files may be uploaded with e.g. tif instead of tiff or geotiff. String extension = ingester.dataSource().fileFormat.extension; - if (fileItems.size() > 1) { + if (files.size() > 1) { // If we have multiple files, as with Shapefile, just keep the original extension for each file. // This could lead to orphaned files after a deletion, we might want to implement wildcard deletion. - extension = FilenameUtils.getExtension(fileItem.getName()).toLowerCase(Locale.ROOT); + extension = FilenameUtils.getExtension(file.getName()).toLowerCase(Locale.ROOT); } FileStorageKey key = new FileStorageKey(DATASOURCES, dataSourceId, extension); - fileStorage.moveIntoStorage(key, dfi.getStoreLocation()); - if (fileItems.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { + fileStorage.moveIntoStorage(key, file); + if (files.size() == 1 || extension.equalsIgnoreCase(SHP.extension)) { file = fileStorage.getFile(key); } } @@ -128,14 +128,20 @@ public static DataSourceUploadAction forFormFields ( final String sourceName = getFormField(formFields, "sourceName", true); final String regionId = getFormField(formFields, "regionId", true); final List fileItems = formFields.get("sourceFiles"); + final List files = new ArrayList<>(); + + for (var fi : fileItems) { + var dfi = (DiskFileItem) fi; + files.add(dfi.getStoreLocation()); + } - FileStorageFormat format = detectUploadFormatAndValidate(fileItems); + FileStorageFormat format = detectUploadFormatAndValidate(files); DataSourceIngester ingester = DataSourceIngester.forFormat(format); - String originalFileNames = fileItems.stream().map(FileItem::getName).collect(Collectors.joining(", ")); + String originalFileNames = files.stream().map(File::getName).collect(Collectors.joining(", ")); ingester.initializeDataSource(sourceName, originalFileNames, regionId, userPermissions); DataSourceUploadAction dataSourceUploadAction = - new DataSourceUploadAction(fileStorage, dataSourceCollection, fileItems, ingester); + new DataSourceUploadAction(fileStorage, dataSourceCollection, files, ingester); return dataSourceUploadAction; } diff --git a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java index d85b883e9..13b158fe1 100644 --- a/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java +++ b/src/main/java/com/conveyal/analysis/datasource/DataSourceUtil.java @@ -2,8 +2,6 @@ import com.conveyal.file.FileStorageFormat; import com.google.common.collect.Sets; -import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.disk.DiskFileItem; import org.apache.commons.io.FilenameUtils; import java.io.File; @@ -29,7 +27,7 @@ public abstract class DataSourceUtil { * @throws DataSourceException if the type of the upload can't be detected or preconditions are violated. * @return the expected type of the uploaded file or files, never null. */ - public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { + public static FileStorageFormat detectUploadFormatAndValidate (List fileItems) { if (isNullOrEmpty(fileItems)) { throw new DataSourceException("You must select some files to upload."); } @@ -78,10 +76,8 @@ public static FileStorageFormat detectUploadFormatAndValidate (List fi * Check that all FileItems supplied are stored in disk files (not memory), that they are all readable and all * have nonzero size. */ - private static void checkFileCharacteristics (List fileItems) { - for (FileItem fileItem : fileItems) { - checkState(fileItem instanceof DiskFileItem, "Uploaded file was not stored to disk."); - File diskFile = ((DiskFileItem)fileItem).getStoreLocation(); + private static void checkFileCharacteristics (List files) { + for (File diskFile : files) { checkState(diskFile.exists(), "Uploaded file does not exist on filesystem as expected."); checkState(diskFile.canRead(), "Read permissions were not granted on uploaded file."); checkState(diskFile.length() > 0, "Uploaded file was empty (contained no data)."); @@ -92,10 +88,10 @@ private static void checkFileCharacteristics (List fileItems) { * Given a list of FileItems, return a set of all unique file extensions present, normalized to lower case. * Always returns a set instance which may be empty, but never null. */ - private static Set extractFileExtensions (List fileItems) { + private static Set extractFileExtensions (List files) { Set fileExtensions = new HashSet<>(); - for (FileItem fileItem : fileItems) { - String fileName = fileItem.getName(); + for (File file : files) { + String fileName = file.getName(); String extension = FilenameUtils.getExtension(fileName); if (extension.isEmpty()) { throw new DataSourceException("Filename has no extension: " + fileName); @@ -106,10 +102,10 @@ private static Set extractFileExtensions (List fileItems) { } /** In uploads containing more than one file, all files are expected to have the same name before the extension. */ - private static void verifyBaseNamesSame (List fileItems) { + private static void verifyBaseNamesSame (List files) { String firstBaseName = null; - for (FileItem fileItem : fileItems) { - String baseName = FilenameUtils.getBaseName(fileItem.getName()); + for (File file : files) { + String baseName = FilenameUtils.getBaseName(file.getName()); if (firstBaseName == null) { firstBaseName = baseName; } else if (!firstBaseName.equals(baseName)) { diff --git a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java index d0deaed13..a6d3a6011 100644 --- a/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java +++ b/src/main/java/com/conveyal/r5/analyst/FreeFormPointSet.java @@ -1,7 +1,7 @@ package com.conveyal.r5.analyst; import com.beust.jcommander.ParameterException; -import com.conveyal.r5.util.InputStreamProvider; +import com.conveyal.file.FileUtils; import com.csvreader.CsvReader; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; @@ -12,6 +12,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -49,7 +50,7 @@ public class FreeFormPointSet extends PointSet { * for the points; if not, row numbers will be used as the ids. */ public static FreeFormPointSet fromCsv ( - InputStreamProvider csvInputStreamProvider, + File csvFile, String latField, String lonField, String idField, @@ -61,7 +62,7 @@ public static FreeFormPointSet fromCsv ( int lonCol = -1; int idCol = -1; int countCol = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); reader.readHeaders(); int nCols = reader.getHeaderCount(); @@ -105,7 +106,7 @@ public static FreeFormPointSet fromCsv ( /* If we reached here, the file is entirely readable. Re-read it from the beginning and record values. */ // Note that we're doing two passes just so we know the array size. We could just use TIntLists. int rec = -1; - try (InputStream csvInputStream = csvInputStreamProvider.getInputStream()) { + try (InputStream csvInputStream = FileUtils.getInputStream(csvFile)) { CsvReader reader = new CsvReader(csvInputStream, ',', StandardCharsets.UTF_8); FreeFormPointSet ret = new FreeFormPointSet(nRecs); ret.name = countField != null ? countField : "[COUNT]"; diff --git a/src/main/java/com/conveyal/r5/analyst/Grid.java b/src/main/java/com/conveyal/r5/analyst/Grid.java index ad9faf1ed..0ec13b52f 100644 --- a/src/main/java/com/conveyal/r5/analyst/Grid.java +++ b/src/main/java/com/conveyal/r5/analyst/Grid.java @@ -1,8 +1,8 @@ package com.conveyal.r5.analyst; import com.conveyal.analysis.datasource.DataSourceException; +import com.conveyal.file.FileUtils; import com.conveyal.r5.common.GeometryUtils; -import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; import com.conveyal.r5.util.ShapefileReader; import com.csvreader.CsvReader; @@ -554,7 +554,7 @@ public static Polygon getPixelGeometry (int localX, int localY, WebMercatorExten * @param ignoreFields if this is non-null, the fields with these names will not be considered when looking for * numeric opportunity count fields. Null strings in the collection are ignored. */ - public static List fromCsv(InputStreamProvider csvInputStreamProvider, + public static List fromCsv(File csvFile, String latField, String lonField, Collection ignoreFields, @@ -563,7 +563,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // Read through the CSV file once to establish its structure (which fields are numeric). // TODO factor out this logic for all CSV loading, reuse for freeform and grids, set progress properly. - CsvReader reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + CsvReader reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); List headers = Arrays.asList(reader.getHeaders()); if (!headers.contains(latField)) { @@ -646,7 +646,7 @@ public static List fromCsv(InputStreamProvider csvInputStreamProvider, // The first read through the CSV just established its structure (e.g. which fields were numeric). // Now, re-read the CSV from the beginning to load the values and populate the grids. - reader = new CsvReader(csvInputStreamProvider.getInputStream(), StandardCharsets.UTF_8); + reader = new CsvReader(FileUtils.getInputStream(csvFile), StandardCharsets.UTF_8); reader.readHeaders(); int i = 0;