Skip to content

Commit

Permalink
Add option 'gbskip' in HashStoreClient when testing in knbvm to contr…
Browse files Browse the repository at this point in the history
…ol what size of files to skip
  • Loading branch information
doulikecookiedough committed Jan 17, 2024
1 parent 48a26eb commit a431e72
Showing 1 changed file with 48 additions and 26 deletions.
74 changes: 48 additions & 26 deletions src/main/java/org/dataone/hashstore/HashStoreClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,14 @@ public static void main(String[] args) throws Exception {
String objType = cmd.getOptionValue("stype");
String originDirectory = cmd.getOptionValue("sdir");
String numObjects = cmd.getOptionValue("nobj");
String sizeOfFilesToSkip = cmd.getOptionValue("gbskip");
FileHashStoreUtility.ensureNotNull(objType, "-stype", "HashStoreClient");
FileHashStoreUtility.ensureNotNull(originDirectory, "-sdir", "HashStoreClient");
FileHashStoreUtility.ensureNotNull(
action, "-sts, -rav, -dfs", "HashStoreClient"
);

testWithKnbvm(action, objType, originDirectory, numObjects);
testWithKnbvm(action, objType, originDirectory, numObjects, sizeOfFilesToSkip);

} else if (cmd.hasOption("getchecksum")) {
String pid = cmd.getOptionValue("pid");
Expand Down Expand Up @@ -330,6 +331,9 @@ private static Options addHashStoreClientOptions() {
"nobj", "numberofobj", true,
"(knbvm) Option to specify number of objects to retrieve from a Metacat db."
);
options.addOption(
"gbskip", "gbsizetoskip", true, "(knbvm) Option to specify the size of objects to skip."
);
options.addOption(
"sdir", "storedirectory", true,
"(knbvm) Option to specify the directory of objects to convert."
Expand Down Expand Up @@ -446,14 +450,17 @@ private static void initializeHashStore(Path storePath) throws HashStoreFactoryE
/**
* Entry point for working with test data found in knbvm (test.arcticdata.io)
*
* @param actionFlag String representing a knbvm test-related method to call.
* @param objType "data" (objects) or "documents" (metadata).
* @param numObjects Number of rows to retrieve from metacat db,
* if null, will retrieve all rows.
* @param actionFlag String representing a knbvm test-related method to call.
* @param objType "data" (objects) or "documents" (metadata).
* @param originDir Directory path of given objType
* @param numObjects Number of rows to retrieve from metacat db,
* if null, will retrieve all rows.
* @param sizeOfFilesToSkip Size of files in GB to skip
* @throws IOException Related to accessing config files or objects
*/
private static void testWithKnbvm(
String actionFlag, String objType, String originDir, String numObjects
String actionFlag, String objType, String originDir, String numObjects,
String sizeOfFilesToSkip
) throws IOException {
// Load metacat db yaml
// Note: In order to test with knbvm, you must manually create a `pgdb.yaml` file with the
Expand All @@ -475,15 +482,22 @@ private static void testWithKnbvm(

try {
System.out.println("Connecting to metacat db.");
if (!objType.equals("object")) {
if (!objType.equals("metadata")) {
String errMsg = "HashStoreClient - objType must be 'object' or 'metadata'";
throw new IllegalArgumentException(errMsg);
}
}

// Setup metacat db access
Class.forName("org.postgresql.Driver"); // Force driver to register itself
Connection connection = DriverManager.getConnection(url, user, password);
Statement statement = connection.createStatement();
String sqlQuery = "SELECT identifier.guid, identifier.docid, identifier.rev,"
+ " systemmetadata.object_format, systemmetadata.checksum,"
+ " systemmetadata.checksum_algorithm FROM identifier INNER JOIN systemmetadata"
+ " ON identifier.guid = systemmetadata.guid ORDER BY identifier.guid"
+ sqlLimitQuery + ";";
+ " systemmetadata.checksum_algorithm, systemmetadata.size FROM identifier"
+ " INNER JOIN systemmetadata ON identifier.guid = systemmetadata.guid"
+ " ORDER BY identifier.guid" + sqlLimitQuery + ";";
ResultSet resultSet = statement.executeQuery(sqlQuery);

// For each row, get guid, docid, rev, checksum and checksum_algorithm
Expand All @@ -497,26 +511,34 @@ private static void testWithKnbvm(
String checksumAlgorithm = resultSet.getString("checksum_algorithm");
String formattedChecksumAlgo = formatAlgo(checksumAlgorithm);
String formatId = resultSet.getString("object_format");

if (!objType.equals("object")) {
if (!objType.equals("metadata")) {
String errMsg = "HashStoreClient - objType must be 'object' or 'metadata'";
throw new IllegalArgumentException(errMsg);
long setItemSize = resultSet.getLong("size");

boolean skipFile = false;
if (sizeOfFilesToSkip != null) {
// Calculate the size of requested gb to skip in bytes
long gbFilesToSkip = Integer.getInteger(sizeOfFilesToSkip) * (1024L * 1024
* 1024);
if (setItemSize > gbFilesToSkip) {
skipFile = true;
}
}
Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev);

if (Files.exists(setItemFilePath)) {
System.out.println(
"File exists (" + setItemFilePath + ")! Adding to resultObjList."
);
Map<String, String> resultObj = new HashMap<>();
resultObj.put("pid", guid);
resultObj.put("algorithm", formattedChecksumAlgo);
resultObj.put("checksum", checksum);
resultObj.put("path", setItemFilePath.toString());
resultObj.put("namespace", formatId);
resultObjList.add(resultObj);
if (skipFile) {
continue;
} else {
Path setItemFilePath = Paths.get(originDir + "/" + docid + "." + rev);
if (Files.exists(setItemFilePath)) {
System.out.println(
"File exists (" + setItemFilePath + ")! Adding to resultObjList."
);
Map<String, String> resultObj = new HashMap<>();
resultObj.put("pid", guid);
resultObj.put("algorithm", formattedChecksumAlgo);
resultObj.put("checksum", checksum);
resultObj.put("path", setItemFilePath.toString());
resultObj.put("namespace", formatId);
resultObjList.add(resultObj);
}
}
}

Expand Down

0 comments on commit a431e72

Please sign in to comment.