Skip to content

Commit

Permalink
BXC-4596 add sourcefile fields to post_migration_report (#94)
Browse files Browse the repository at this point in the history
* add matchingValue and sourceFile to post_migration_report

* cache sourceFilesInfo, delete unused code, remove getMatchingValue/getSourceFile from addWorkRow

* add getMatchingValue/getSourceFile to addWorkRow, fix tests
  • Loading branch information
krwong authored Jun 5, 2024
1 parent 13f9a68 commit 6dc2fad
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import edu.unc.lib.boxc.migration.cdm.exceptions.MigrationException;
import edu.unc.lib.boxc.migration.cdm.model.GroupMappingInfo;
import edu.unc.lib.boxc.migration.cdm.model.MigrationProject;
import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo;
import edu.unc.lib.boxc.migration.cdm.services.ChompbConfigService.ChompbConfig;
import edu.unc.lib.boxc.migration.cdm.util.PostMigrationReportConstants;
import edu.unc.lib.boxc.model.api.ResourceType;
Expand Down Expand Up @@ -35,13 +36,15 @@ public class PostMigrationReportService {
private MigrationProject project;
private ChompbConfig chompbConfig;
private DescriptionsService descriptionsService;
private SourceFileService sourceFileService;
private CSVPrinter csvPrinter;
private SAXBuilder saxBuilder;
private String singleBaseUrl;
private String compoundBaseUrl;
private String bxcBaseUrl;
private static final int CACHE_SIZE = 16;
private Map<String, String> parentTitleCache;
private SourceFilesInfo sourceFilesInfo;

/**
* Initialize the service
Expand Down Expand Up @@ -99,10 +102,19 @@ public void addWorkRow(String cdmObjectId, String boxcWorkId, int childCount, bo
String cdmUrl = buildCdmUrl(cdmObjectId, true, isSingleItem);
String boxcTitle = getParentTitle(cdmObjectId);
String boxcUrl = this.bxcBaseUrl + boxcWorkId;
String matchingValue = null;
String parentUrl = null;
String parentTitle = null;
String objType = ResourceType.Work.name();
addRow(cdmObjectId, cdmUrl, objType, boxcUrl, boxcTitle, null, parentUrl, parentTitle, childCount);
String sourceFile = null;

if (isSingleItem) {
matchingValue = getMatchingValue(cdmObjectId);
sourceFile = getSourceFile(cdmObjectId);
}

addRow(cdmObjectId, cdmUrl, objType, boxcUrl, boxcTitle, matchingValue, sourceFile,
null, parentUrl, parentTitle, childCount);
}

/**
Expand All @@ -120,15 +132,28 @@ public void addFileRow(String fileCdmId, String parentCdmId, String boxcWorkId,
String cdmUrl = buildCdmUrl(fileCdmId, false, isSingleItem);
String boxcTitle = extractTitle(fileCdmId);
String boxcUrl = this.bxcBaseUrl + boxcFileId;
String matchingValue;
String parentUrl = this.bxcBaseUrl + boxcWorkId;
String parentTitle = getParentTitle(parentCdmId);
String objType = ResourceType.File.name();
addRow(fileCdmId, cdmUrl, objType, boxcUrl, boxcTitle, null, parentUrl, parentTitle, null);
String sourceFile;

if (isSingleItem) {
matchingValue = getMatchingValue(parentCdmId);
sourceFile = getSourceFile(parentCdmId);
} else {
matchingValue = getMatchingValue(fileCdmId);
sourceFile = getSourceFile(fileCdmId);
}

addRow(fileCdmId, cdmUrl, objType, boxcUrl, boxcTitle, matchingValue, sourceFile,
null, parentUrl, parentTitle, null);
}

protected void addRow(String cdmId, String cdmUrl, String objType, String boxcUrl, String boxcTitle,
String verified, String parentUrl, String parentTitle, Integer childCount) throws IOException {
csvPrinter.printRecord(cdmId, cdmUrl, objType, boxcUrl, boxcTitle,
String matchingValue, String sourceFile, String verified, String parentUrl,
String parentTitle, Integer childCount) throws IOException {
csvPrinter.printRecord(cdmId, cdmUrl, objType, boxcUrl, boxcTitle, matchingValue, sourceFile,
verified, parentUrl, parentTitle, childCount);
}

Expand Down Expand Up @@ -175,6 +200,25 @@ private String extractTitle(String cdmId) {
return null;
}

private String getMatchingValue(String cdmId) throws IOException {
var sourceFilesInfo = getSourceFilesInfo();
String matchingValue = sourceFilesInfo.getMappingByCdmId(cdmId).getMatchingValue();
return matchingValue;
}

private String getSourceFile(String cdmId) throws IOException {
var sourceFilesInfo = getSourceFilesInfo();
String sourceFile = sourceFilesInfo.getMappingByCdmId(cdmId).getSourcePathString();
return sourceFile;
}

private SourceFilesInfo getSourceFilesInfo() throws IOException {
if (sourceFilesInfo == null) {
sourceFilesInfo = sourceFileService.loadMappings();
}
return sourceFilesInfo;
}

public void setProject(MigrationProject project) {
this.project = project;
}
Expand All @@ -186,4 +230,8 @@ public void setChompbConfig(ChompbConfig chompbConfig) {
public void setDescriptionsService(DescriptionsService descriptionsService) {
this.descriptionsService = descriptionsService;
}

public void setSourceFileService(SourceFileService sourceFileService) {
this.sourceFileService = sourceFileService;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ private void initDependencies(SipGenerationOptions options, Connection conn) thr
postMigrationReportService.setDescriptionsService(descriptionsService);
postMigrationReportService.setProject(project);
postMigrationReportService.setChompbConfig(chompbConfig);
postMigrationReportService.setSourceFileService(sourceFileService);
postMigrationReportService.init();

workGeneratorFactory = new WorkGeneratorFactory();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
public class PostMigrationReportConstants {
public static final String BXC_URL_HEADER = "boxc_url";
public static final String VERIFIED_HEADER = "verified";
public static final int VERIFIED_INDEX = 5;
public static final int VERIFIED_INDEX = 7;
public static final String[] CSV_HEADERS = new String[] {
"cdm_id", "cdm_url", "boxc_obj_type", "boxc_url", "boxc_title", VERIFIED_HEADER,
"boxc_parent_work_url", "boxc_parent_work_title", "children_count" };
"cdm_id", "cdm_url", "boxc_obj_type", "boxc_url", "boxc_title", "matching_value", "source_file",
VERIFIED_HEADER, "boxc_parent_work_url", "boxc_parent_work_title", "children_count" };
public static final CSVFormat CSV_OUTPUT_FORMAT = CSVFormat.Builder.create()
.setHeader(CSV_HEADERS)
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ public void setup() throws Exception {
public void reportCountWorksTest() throws Exception {
reportGenerator.init();
reportGenerator.addRow("25", CDM_URL_1, "Work", BOXC_URL_1, "Redoubt C",
null, "", "", 1);
null, null, null, "", "", 1);
reportGenerator.addRow("26", CDM_URL_2, "File", BOXC_URL_2, "A file",
null, BOXC_URL_1, "Redoubt C", null);
null, null, null, BOXC_URL_1, "Redoubt C", null);
reportGenerator.closeCsv();

long numWorks = service.countWorks();
Expand All @@ -64,11 +64,11 @@ public void reportCountWorksTest() throws Exception {
public void reportCountFilesTest() throws Exception {
reportGenerator.init();
reportGenerator.addRow("25", CDM_URL_1, "Work", BOXC_URL_1, "Redoubt C",
null, "", "", 1);
null, null, null, "", "", 1);
reportGenerator.addRow("26", CDM_URL_2, "File", BOXC_URL_2, "A file",
null, BOXC_URL_1, "Redoubt C", null);
null, null, null, BOXC_URL_1, "Redoubt C", null);
reportGenerator.addRow("27", CDM_URL_3, "File", BOXC_URL_3, "A file",
null, BOXC_URL_1, "Redoubt C", null);
null, null, null, BOXC_URL_1, "Redoubt C", null);
reportGenerator.closeCsv();

long numFiles = service.countFiles();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
package edu.unc.lib.boxc.migration.cdm.services;

import edu.unc.lib.boxc.migration.cdm.model.MigrationProject;
import edu.unc.lib.boxc.migration.cdm.model.SourceFilesInfo;
import edu.unc.lib.boxc.migration.cdm.test.BxcEnvironmentHelper;
import edu.unc.lib.boxc.migration.cdm.test.CdmEnvironmentHelper;
import edu.unc.lib.boxc.migration.cdm.test.SipServiceHelper;
import edu.unc.lib.boxc.migration.cdm.util.ProjectPropertiesSerialization;
import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;

import static edu.unc.lib.boxc.migration.cdm.test.PostMigrationReportTestHelper.assertContainsRow;
import static edu.unc.lib.boxc.migration.cdm.test.PostMigrationReportTestHelper.parseReport;
Expand All @@ -34,6 +40,7 @@ public class PostMigrationReportServiceTest {
private MigrationProject project;
private SipServiceHelper testHelper;
private DescriptionsService descriptionsService;
private SourceFileService sourceFileService;
private PostMigrationReportService service;

@BeforeEach
Expand All @@ -44,11 +51,13 @@ public void setup() throws Exception {
CdmEnvironmentHelper.DEFAULT_ENV_ID, BxcEnvironmentHelper.DEFAULT_ENV_ID);
testHelper = new SipServiceHelper(project, tmpFolder);
descriptionsService = testHelper.getDescriptionsService();
sourceFileService = testHelper.getSourceFileService();

service = new PostMigrationReportService();
service.setProject(project);
service.setChompbConfig(testHelper.getChompbConfig());
service.setDescriptionsService(testHelper.getDescriptionsService());
service.setSourceFileService(testHelper.getSourceFileService());
service.init();
}

Expand All @@ -59,6 +68,9 @@ void closeService() throws Exception {

@Test
public void addSingleItemTest() throws Exception {
testHelper.indexExportData("mini_gilmer");
Path srcPath1 = testHelper.addSourceFile("25.txt");
writeSourceFileCsv(mappingBody("25,," + srcPath1 +","));
testHelper.populateDescriptions("gilmer_mods1.xml");

service.addWorkRow("25", BOXC_ID_1, 1, true);
Expand All @@ -72,6 +84,8 @@ public void addSingleItemTest() throws Exception {
BOXC_URL_1,
"Redoubt C",
"",
srcPath1.toString(),
"",
"",
"",
"1");
Expand All @@ -81,13 +95,18 @@ public void addSingleItemTest() throws Exception {
BOXC_URL_2,
"",
"",
srcPath1.toString(),
"",
BOXC_URL_1,
"Redoubt C",
"");
}

@Test
public void addSingleItemWithFileDescTest() throws Exception {
testHelper.indexExportData("mini_gilmer");
Path srcPath1 = testHelper.addSourceFile("25.txt");
writeSourceFileCsv(mappingBody("25,," + srcPath1 +","));
testHelper.populateDescriptions("gilmer_mods1.xml", "gilmer_mods_children.xml");

service.addWorkRow("25", BOXC_ID_1, 1, true);
Expand All @@ -101,6 +120,8 @@ public void addSingleItemWithFileDescTest() throws Exception {
BOXC_URL_1,
"Redoubt C",
"",
srcPath1.toString(),
"",
"",
"",
"1");
Expand All @@ -110,13 +131,19 @@ public void addSingleItemWithFileDescTest() throws Exception {
BOXC_URL_2,
"Redoubt C Scan File",
"",
srcPath1.toString(),
"",
BOXC_URL_1,
"Redoubt C",
"");
}

@Test
public void addGroupedTest() throws Exception {
testHelper.indexExportData("grouped_gilmer");
Path srcPath1 = testHelper.addSourceFile("26.txt");
Path srcPath2 = testHelper.addSourceFile("27.txt");
writeSourceFileCsv(mappingBody("26,," + srcPath1 +",", "27,," + srcPath2 +","));
testHelper.populateDescriptions("grouped_mods.xml");

service.addWorkRow("grp:groupa:group1", BOXC_ID_1, 2, false);
Expand All @@ -133,13 +160,17 @@ public void addGroupedTest() throws Exception {
"",
"",
"",
"",
"",
"2");
assertContainsRow(rows, "26",
"http://localhost/cdm/singleitem/collection/proj/id/26",
"File",
BOXC_URL_2,
"Plan of Battery McIntosh",
"",
srcPath1.toString(),
"",
BOXC_URL_1,
"Folder Group 1",
"");
Expand All @@ -149,6 +180,8 @@ public void addGroupedTest() throws Exception {
BOXC_URL_3,
"Fort DeRussy on Red River, Louisiana",
"",
srcPath2.toString(),
"",
BOXC_URL_1,
"Folder Group 1",
"");
Expand All @@ -157,6 +190,9 @@ public void addGroupedTest() throws Exception {
@Test
public void addCompoundTest() throws Exception {
testHelper.indexExportData(Paths.get("src/test/resources/keepsakes_fields.csv"), "mini_keepsakes");
Path srcPath1 = testHelper.addSourceFile("nccg_ck_1042-22_v1.tif");
Path srcPath2 = testHelper.addSourceFile("nccg_ck_1042-22_v2.tif");
writeSourceFileCsv(mappingBody("602,," + srcPath1 +",", "603,," + srcPath2 +","));
descriptionsService.generateDocuments(true);
descriptionsService.expandDescriptions();

Expand All @@ -174,13 +210,17 @@ public void addCompoundTest() throws Exception {
"",
"",
"",
"",
"",
"2");
assertContainsRow(rows, "602",
"http://localhost/cdm/singleitem/collection/proj/id/602",
"File",
BOXC_URL_2,
"World War II ration book",
"",
srcPath1.toString(),
"",
BOXC_URL_1,
"Tiffany's pillbox commemorating UNC's bicentennial (closed, in box)",
"");
Expand All @@ -190,8 +230,22 @@ public void addCompoundTest() throws Exception {
BOXC_URL_3,
"World War II ration book (instructions)",
"",
srcPath2.toString(),
"",
BOXC_URL_1,
"Tiffany's pillbox commemorating UNC's bicentennial (closed, in box)",
"");
}

private String mappingBody(String... rows) {
return String.join(",", SourceFilesInfo.CSV_HEADERS) + "\n"
+ String.join("\n", rows);
}

private void writeSourceFileCsv(String mappingBody) throws IOException {
FileUtils.write(project.getSourceFilesMappingPath().toFile(),
mappingBody, StandardCharsets.UTF_8);
project.getProjectProperties().setSourceFilesUpdatedDate(Instant.now());
ProjectPropertiesSerialization.write(project);
}
}
Loading

0 comments on commit 6dc2fad

Please sign in to comment.