Skip to content

Commit

Permalink
BXC-4651 export_objects command (#103)
Browse files Browse the repository at this point in the history
* export_objects cmd and tests

* fix javadoc

* add exception name to terminal log
  • Loading branch information
krwong authored Jul 31, 2024
1 parent 1932f99 commit afe7b91
Show file tree
Hide file tree
Showing 7 changed files with 426 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/main/java/edu/unc/lib/boxc/migration/cdm/CLIMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
MigrationTypeReportCommand.class,
FilterIndexCommand.class,
AggregateFilesCommand.class,
PermissionsCommand.class
PermissionsCommand.class,
ExportObjectsCommand.class
})
public class CLIMain implements Callable<Integer> {
@Option(names = { "-w", "--work-dir" },
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package edu.unc.lib.boxc.migration.cdm;

import edu.unc.lib.boxc.migration.cdm.exceptions.InvalidProjectStateException;
import edu.unc.lib.boxc.migration.cdm.model.MigrationProject;
import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService;
import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory;
import org.slf4j.Logger;
import picocli.CommandLine.Command;
import picocli.CommandLine.ParentCommand;

import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.concurrent.Callable;

import static edu.unc.lib.boxc.migration.cdm.util.CLIConstants.outputLogger;
import static org.slf4j.LoggerFactory.getLogger;

/**
* @author krwong
*/
@Command(name = "export_objects",
description = "Export record ids and filenames from a source_files.csv mapping.")
public class ExportObjectsCommand implements Callable<Integer> {
private static final Logger log = getLogger(ExportObjectsCommand.class);

@ParentCommand
private CLIMain parentCommand;

private MigrationProject project;
private ExportObjectsService exportObjectsService;

public void init() throws Exception {
Path currentPath = parentCommand.getWorkingDirectory();
project = MigrationProjectFactory.loadMigrationProject(currentPath);
exportObjectsService = new ExportObjectsService();
exportObjectsService.setProject(project);
}

@Override
public Integer call() {
long start = System.nanoTime();
try {
init();
exportObjectsService.exportFilesystemObjects();
outputLogger.info("Export objects in project {} in {}s", project.getProjectName(),
(System.nanoTime() - start) / 1e9);
return 0;
} catch (Exception e) {
log.error("Failed to export objects in {}", project.getProjectName(), e);
outputLogger.info("Failed to export objects in {}: {}: {}", project.getProjectName(),
e.getClass().getSimpleName(), e.getMessage());
return 1;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package edu.unc.lib.boxc.migration.cdm.model;

import java.util.ArrayList;
import java.util.List;

/**
* Exported objects info for a project
* @author krwong
*/
public class ExportObjectsInfo {
public static final String RECORD_ID = "record_id";
public static final String FILE_PATH = "file_path";
public static final String FILENAME = "filename";
public static final String[] CSV_HEADERS = new String[] {RECORD_ID, FILE_PATH, FILENAME};

private List<ExportedObject> objects;

public ExportObjectsInfo() {
objects = new ArrayList<>();
}

public List<ExportedObject> getObjects() {
return objects;
}

public void setObjects(List<ExportedObject> objects) {
this.objects = objects;
}

public static class ExportedObject {
private String recordId;
private String filePath;
private String filename;

public String getRecordId() {
return recordId;
}

public void setRecordId(String recordId) {
this.recordId = recordId;
}

public String getFilePath() {
return filePath;
}

public void setFilePath(String filePath) {
this.filePath = filePath;
}

public String getFilename() {
return filename;
}

public void setFilename(String filename) {
this.filename = filename;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class MigrationProject {
public static final String PERMISSIONS_FILENAME = "patron_permissions.csv";
public static final String PROJECT_SOURCE_CDM = "cdm";
public static final String PROJECT_SOURCE_FILES = "files";
public static final String EXPORT_OBJECTS_FILENAME = "exported_objects.csv";

private Path projectPath;
private MigrationProjectProperties properties;
Expand Down Expand Up @@ -172,4 +173,11 @@ public Path getPostMigrationReportPath() {
public Path getPermissionsPath() {
return projectPath.resolve(PERMISSIONS_FILENAME);
}

/**
* @return Path of the exported objects file
*/
public Path getExportObjectsPath() {
return projectPath.resolve(EXPORT_OBJECTS_FILENAME);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package edu.unc.lib.boxc.migration.cdm.services;

import edu.unc.lib.boxc.migration.cdm.exceptions.InvalidProjectStateException;
import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo;
import edu.unc.lib.boxc.migration.cdm.model.MigrationProject;
import edu.unc.lib.boxc.migration.cdm.model.MigrationProjectProperties;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;

import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

import static org.slf4j.LoggerFactory.getLogger;

/**
* Service for generating exported_objects.csv
* @author krwong
*/
public class ExportObjectsService {
private static final Logger log = getLogger(ExportObjectsService.class);

private MigrationProject project;

/**
* Export objects from filesystem source_files.csv mapping
* @throws Exception
*/
public void exportFilesystemObjects() throws Exception {
validateProjectState();
var sourcePath = project.getSourceFilesMappingPath();
var exportObjectPath = getExportedObjectsPath();

// Simultaneously read from the source_files mapping and write to the exported_objects.csv
try (
var sourceFilesParser = SourceFileService.openMappingsParser(sourcePath);
var exportObjectsPrinter = openMappingsPrinter(exportObjectPath);
) {
for (CSVRecord sourceFileRecord : sourceFilesParser) {
String id = sourceFileRecord.get(0);
String filePath = sourceFileRecord.get(2);
String filename = FilenameUtils.getName(sourceFileRecord.get(2));
exportObjectsPrinter.printRecord(id, filePath, filename);
}
}
}

private void validateProjectState() {
MigrationProjectProperties props = project.getProjectProperties();
if (props.getSourceFilesUpdatedDate() == null) {
throw new InvalidProjectStateException("Source files must be mapped");
}
}

/**
* @param mappingPath Path CSV will output to
* @return CSVPrinter for writing to specified destination
* @throws IOException
*/
public static CSVPrinter openMappingsPrinter(Path mappingPath) throws IOException {
BufferedWriter writer = Files.newBufferedWriter(mappingPath);
return new CSVPrinter(writer, CSVFormat.DEFAULT.withHeader(ExportObjectsInfo.CSV_HEADERS));
}

public Path getExportedObjectsPath() {
return project.getExportObjectsPath();
}

public void setProject(MigrationProject project) {
this.project = project;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package edu.unc.lib.boxc.migration.cdm;

import edu.unc.lib.boxc.migration.cdm.model.ExportObjectsInfo;
import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo;
import edu.unc.lib.boxc.migration.cdm.services.ExportObjectsService;
import edu.unc.lib.boxc.migration.cdm.services.MigrationProjectFactory;
import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper;
import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.Arrays;
import java.util.List;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

public class ExportObjectsCommandIT extends AbstractCommandIT {
private static final String PROJECT_NAME = "proj";
private ExportObjectsService exportObjectsService;

@BeforeEach
public void setup() throws Exception {
project = MigrationProjectFactory.createCdmMigrationProject(
tmpFolder, PROJECT_NAME, null, "user",
null, BxcEnvironmentHelper.DEFAULT_ENV_ID);
exportObjectsService = new ExportObjectsService();
}

@Test
public void exportObjectsNoSourceFileTest() throws Exception {
String[] args = new String[] {
"-w", project.getProjectPath().toString(),
"export_objects"
};

executeExpectFailure(args);
assertOutputContains("Failed to export objects in proj: InvalidProjectStateException: " +
"Source files must be mapped");
}

@Test
public void exportObjectsTest() throws Exception {
writeSourceCsv(sourceMappingBody("testid,," + filesystemSourceFile("IMG_2377.jpeg") + ",",
"test-00001,," + filesystemSourceFile("D2_035_Varners_DrugStore_interior.tif") + ",",
"test-00002,," + filesystemSourceFile("MJM_7_016_LumberMills_IndianCreekTrestle.tif") + ","));
project.getProjectProperties().setSourceFilesUpdatedDate(Instant.now());
ProjectPropertiesSerialization.write(project);

String[] args = new String[] {
"-w", project.getProjectPath().toString(),
"export_objects"
};
executeExpectSuccess(args);

Path exportedObjectsPath = project.getExportObjectsPath();
assertTrue(Files.exists(exportedObjectsPath));
List<CSVRecord> rows = listCsvRecords(exportedObjectsPath);
assertEquals(3, rows.size());
assertIterableEquals(Arrays.asList("testid", "src/test/resources/files/IMG_2377.jpeg",
"IMG_2377.jpeg"), rows.get(0));
assertIterableEquals(Arrays.asList("test-00001",
"src/test/resources/files/D2_035_Varners_DrugStore_interior.tif",
"D2_035_Varners_DrugStore_interior.tif"), rows.get(1));
assertIterableEquals(Arrays.asList("test-00002",
"src/test/resources/files/MJM_7_016_LumberMills_IndianCreekTrestle.tif",
"MJM_7_016_LumberMills_IndianCreekTrestle.tif"), rows.get(2));
}

private String sourceMappingBody(String... rows) {
return String.join(",", SourceFilesInfo.CSV_HEADERS) + "\n"
+ String.join("\n", rows);
}

private void writeSourceCsv(String mappingBody) throws IOException {
FileUtils.write(project.getSourceFilesMappingPath().toFile(),
mappingBody, StandardCharsets.UTF_8);
}

private Path filesystemSourceFile(String relPath) {
Path basePath = Path.of("src/test/resources/files");
return basePath.resolve(relPath);
}

private List<CSVRecord> listCsvRecords(Path exportedObjectsPath) throws Exception {
List<CSVRecord> rows;
try (
Reader reader = Files.newBufferedReader(exportedObjectsPath);
CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT
.withFirstRecordAsHeader()
.withHeader(ExportObjectsInfo.CSV_HEADERS)
.withTrim());
) {
rows = csvParser.getRecords();
}
return rows;
}
}
Loading

0 comments on commit afe7b91

Please sign in to comment.